From 29979b64a8149626371b4219f61db24fe5bfea07 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Tue, 4 Nov 2014 11:16:09 +0000 Subject: [PATCH 001/207] bump ftw to 0.0.40 --- logstash-input-elasticsearch.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index e22bafc8..3bf7341a 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -22,7 +22,7 @@ Gem::Specification.new do |s| # Gem dependencies s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0' - s.add_runtime_dependency 'ftw', ['~> 0.0.39'] + s.add_runtime_dependency 'ftw', ['~> 0.0.40'] s.add_runtime_dependency 'logstash-codec-json' end From a226653a1ca6a28700c2d24616d0e0f7eb1be661 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 4 Nov 2014 18:08:55 -0500 Subject: [PATCH 002/207] Add Apache 2.0 License --- LICENSE | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..f8b711d5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ +Copyright (c) 2012-2014 Elasticsearch + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. From 2bb8c66a2167af6f268d00ba5e8a438c82236e99 Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Wed, 5 Nov 2014 16:29:35 -0800 Subject: [PATCH 003/207] Add ruby formatting for asciidoc support Fixes #3 --- lib/logstash/inputs/elasticsearch.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index d34630ea..dc31342b 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -8,7 +8,7 @@ # This is useful for replaying test logs, reindexing, etc. # # Example: -# +# [source,ruby] # input { # # Read all documents from Elasticsearch matching the given query # elasticsearch { From add0954a1f5d5227060dc3adc63e9919361b80ac Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Thu, 6 Nov 2014 11:50:24 +0000 Subject: [PATCH 004/207] Change the implementation to use elasticsearch-ruby Also adds ssl and basic auth capability to the plugin Fixes #4 --- lib/logstash/inputs/elasticsearch.rb | 122 ++++++++++----------- logstash-input-elasticsearch.gemspec | 3 +- spec/inputs/elasticsearch_spec.rb | 156 +++++++++++++++++---------- 3 files changed, 158 insertions(+), 123 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index dc31342b..1c9402d6 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -1,8 +1,7 @@ # encoding: utf-8 require "logstash/inputs/base" require "logstash/namespace" -require "logstash/util/socket_peer" -require "logstash/json" +require "base64" # Read from an Elasticsearch cluster, based on search query results. # This is useful for replaying test logs, reindexing, etc. @@ -13,7 +12,7 @@ # # Read all documents from Elasticsearch matching the given query # elasticsearch { # host => "localhost" -# query => "ERROR" +# query => '{ "query": { "match": { "_id": "C5b2xLQwTZa76jBmHIbwHQ" } }' # } # } # @@ -28,8 +27,8 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base default :codec, "json" - # The IP address or hostname of your Elasticsearch server. - config :host, :validate => :string, :required => true + # List of elasticsearch hosts to use for querying. + config :hosts, :validate => :array # The HTTP port of your Elasticsearch server's REST interface. config :port, :validate => :number, :default => 9200 @@ -52,83 +51,74 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # round trip (i.e. between the previous scan scroll request, to the next). config :scroll, :validate => :string, :default => "1m" + # Basic Auth - username + config :user, :validate => :string + + # Basic Auth - password + config :password, :validate => :password + + # SSL + config :ssl, :validate => :boolean, :default => false + + # SSL Certificate Authority file + config :ca_file, :validate => :path + public def register - require "ftw" - @agent = FTW::Agent.new + require "elasticsearch" - params = { - "q" => @query, - "scroll" => @scroll, - "size" => "#{@size}", + @options = { + index: @index, + body: @query, + scroll: @scroll, + size: @size } - params['search_type'] = "scan" if @scan - @search_url = "http://#{@host}:#{@port}/#{@index}/_search?#{encode(params)}" - @scroll_url = "http://#{@host}:#{@port}/_search/scroll?#{encode({"scroll" => @scroll})}" - end # def register + @options[:search_type] = 'scan' if @scan - private - def encode(hash) - return hash.collect do |key, value| - CGI.escape(key) + "=" + CGI.escape(value) - end.join("&") - end # def encode + transport_options = {} - private - def execute_search_request - response = @agent.get!(@search_url) - json = "" - response.read_body { |c| json << c } - json - end + if @user && @password + token = Base64.strict_encode64("#{@user}:#{@password.value}") + transport_options[:headers] = { Authorization: "Basic #{token}" } + end - private - def execute_scroll_request(scroll_id) - response = @agent.post!(@scroll_url, :body => scroll_id) - json = "" - response.read_body { |c| json << c } - json - end + hosts = if @ssl then + @hosts.map {|h| { host: h, scheme: 'https' } } + else + @hosts + end + + if @ssl && @ca_file + transport_options[:ssl] = { ca_file: @ca_file } + end + + @client = Elasticsearch::Client.new hosts: hosts, transport_options: transport_options + + end # def register public def run(output_queue) - result = LogStash::Json.load(execute_search_request) - scroll_id = result["_scroll_id"] - # When using the search_type=scan we don't get an initial result set. - # So we do it here. + # get first wave of data + r = @client.search @options + + # since 'scan' doesn't return data on the search call, do an extra scroll if @scan - result = LogStash::Json.load(execute_scroll_request(scroll_id)) + r = scroll_request(r['_scroll_id']) end - loop do - break if result.nil? - hits = result["hits"]["hits"] - break if hits.empty? - - hits.each do |hit| - # Hack to make codecs work - @codec.decode(LogStash::Json.dump(hit["_source"])) do |event| - decorate(event) - output_queue << event - end + while r['hits']['hits'].any? do + r['hits']['hits'].each do |event| + decorate(event) + output_queue << event end - - # Get the scroll id from the previous result set and use it for getting the next data set - scroll_id = result["_scroll_id"] - - # Fetch the next result set - result = LogStash::Json.load(execute_scroll_request(scroll_id)) - - if result["error"] - @logger.warn(result["error"], :request => scroll_url) - # TODO(sissel): raise an error instead of breaking - break - end - + r = scroll_request(r['_scroll_id']) end - rescue LogStash::ShutdownSignal - # Do nothing, let us quit. end # def run + + private + def scroll_request scroll_id + @client.scroll(body: scroll_id, scroll: @scroll) + end end # class LogStash::Inputs::Elasticsearch diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 3bf7341a..76f0d8e8 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -22,8 +22,7 @@ Gem::Specification.new do |s| # Gem dependencies s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0' - s.add_runtime_dependency 'ftw', ['~> 0.0.40'] - s.add_runtime_dependency 'logstash-codec-json' + s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] end diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 4eb0f456..8fedeaf0 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1,60 +1,115 @@ require "spec_helper" require "logstash/inputs/elasticsearch" +require "elasticsearch" describe "inputs/elasticsearch" do - - - search_response = <<-RESPONSE - { - "_scroll_id":"xxx", - "took":5, - "timed_out":false, - "_shards":{"total":15,"successful":15,"failed":0}, - "hits":{ - "total":1000050, - "max_score":1.0, - "hits":[ - { - "_index":"logstash2", - "_type":"logs", - "_id":"AmaqL7VuSWKF-F6N_Gz72g", - "_score":1.0, - "_source" : { - "message":"foobar", - "@version":"1", - "@timestamp":"2014-05-19T21:08:39.000Z", - "host":"colin-mbp13r" - } - } - ] + + it "should retrieve json event from elasticseach" do + + config = %q[ + input { + elasticsearch { + hosts => ["node01"] + scan => false + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + } } - } - RESPONSE + ] - scroll_response = <<-RESPONSE - { - "hits":{ - "hits":[] + response = { + "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "failed" => 0 + }, + "hits" => { + "total" => 1, + "max_score" => 1.0, + "hits" => [ { + "_index" => "logstash-2014.10.12", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "fields" => {"message" => ["ohayo"] } + } ] } } - RESPONSE - config <<-CONFIG - input { - elasticsearch { - host => "localhost" - scan => false + scroll_reponse = { + "_scroll_id" => "r453Wc1jh0caLJhSDg", + "hits" => { "hits" => [] } + } + + client = Elasticsearch::Client.new + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + expect(client).to receive(:search).with(any_args).and_return(response) + expect(client).to receive(:scroll).with({:body=>"cXVlcnlUaGVuRmV0Y2g", :scroll=>"1m"}).and_return(scroll_reponse) + + pipeline = LogStash::Pipeline.new(config) + queue = Queue.new + pipeline.instance_eval do + @output_func = lambda { |event| queue << event } + end + pipeline_thread = Thread.new { pipeline.run } + event = queue.pop + + insist { event["fields"]["message"] } == [ "ohayo" ] + + pipeline_thread.join + end + + it "should retrieve json event from elasticseach with scan" do + + config = %q[ + input { + elasticsearch { + hosts => ["node01"] + scan => true + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + } } + ] + + scan_response = { + "_scroll_id" => "DcrY3G1xff6SB", } - CONFIG - it "should retrieve json event from elasticseach" do - # I somewhat duplicated our "input" rspec extension because I needed to add mocks for the the actual ES calls - # and rspec expectations need to be in "it" statement but the "input" extension defines the "it" - # TODO(colin) see how we can improve our rspec extension to better integrate in these scenarios + scroll_responses = [ + { + "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "failed" => 0 + }, + "hits" => { + "total" => 1, + "max_score" => 1.0, + "hits" => [ { + "_index" => "logstash-2014.10.12", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "fields" => {"message" => ["ohayo"] } + } ] + } + }, + { + "_scroll_id" => "r453Wc1jh0caLJhSDg", + "hits" => { "hits" => [] } + } + ] - expect_any_instance_of(LogStash::Inputs::Elasticsearch).to receive(:execute_search_request).and_return(search_response) - expect_any_instance_of(LogStash::Inputs::Elasticsearch).to receive(:execute_scroll_request).with(any_args).and_return(scroll_response) + client = Elasticsearch::Client.new + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + expect(client).to receive(:search).with(any_args).and_return(scan_response) + expect(client).to receive(:scroll).with({:body=>"DcrY3G1xff6SB", :scroll=>"1m"}).and_return(scroll_responses.first) + expect(client).to receive(:scroll).with({:body=>"cXVlcnlUaGVuRmV0Y2g", :scroll=>"1m"}).and_return(scroll_responses.last) pipeline = LogStash::Pipeline.new(config) queue = Queue.new @@ -64,17 +119,8 @@ pipeline_thread = Thread.new { pipeline.run } event = queue.pop - insist { event["message"] } == "foobar" - - # do not call pipeline.shutdown here, as it will stop the plugin execution randomly - # and maybe kill input before calling execute_scroll_request. - # TODO(colin) we should rework the pipeliene shutdown to allow a soft/clean shutdown mecanism, - # using a shutdown event which can be fed into each plugin queue and when the plugin sees it - # exits after completing its processing. - # - # pipeline.shutdown - # - # instead, since our scroll_response will terminate the plugin, we can just join the pipeline thread + insist { event["fields"]["message"] } == [ "ohayo" ] + pipeline_thread.join end end From c8b9bab43ca20e82890000a0cb592d07a3ffd8f2 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Thu, 6 Nov 2014 19:29:42 +0000 Subject: [PATCH 005/207] better example of a query in docs Fixes #4 --- lib/logstash/inputs/elasticsearch.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 1c9402d6..16da6e67 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -12,13 +12,13 @@ # # Read all documents from Elasticsearch matching the given query # elasticsearch { # host => "localhost" -# query => '{ "query": { "match": { "_id": "C5b2xLQwTZa76jBmHIbwHQ" } }' +# query => '{ "query": { "match": { "statuscode": 200 } } }' # } # } # # This would create an Elasticsearch query with the following format: # -# http://localhost:9200/logstash-*/_search?q=ERROR&scroll=1m&size=1000 +# http://localhost:9200/logstash-*/_search?q=%7B%22query%22:%7B%22match%22:%7B%22statuscode%22:200%7D%7D%7D&scroll=1m&size=1000 # # * TODO(sissel): Option to keep the index, type, and doc id so we can do reindexing? class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base From 0d90b2ee021cb8aceb60d19adfaa87915165b868 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Fri, 7 Nov 2014 16:08:15 +0000 Subject: [PATCH 006/207] remove encoding from example url Fixes #4 --- lib/logstash/inputs/elasticsearch.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 16da6e67..93f24c83 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -18,7 +18,7 @@ # # This would create an Elasticsearch query with the following format: # -# http://localhost:9200/logstash-*/_search?q=%7B%22query%22:%7B%22match%22:%7B%22statuscode%22:200%7D%7D%7D&scroll=1m&size=1000 +# http://localhost:9200/logstash-*/_search?q='{ "query": { "match": { "statuscode": 200 } } }'&scroll=1m&size=1000 # # * TODO(sissel): Option to keep the index, type, and doc id so we can do reindexing? class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base From 3ae5b27d6f31bff17e1982533439be4f247c4ca7 Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Fri, 7 Nov 2014 12:27:11 -0800 Subject: [PATCH 007/207] minor inline code asciidoc formatting --- lib/logstash/inputs/elasticsearch.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 93f24c83..1e531996 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -17,10 +17,10 @@ # } # # This would create an Elasticsearch query with the following format: -# +# [source,json] # http://localhost:9200/logstash-*/_search?q='{ "query": { "match": { "statuscode": 200 } } }'&scroll=1m&size=1000 # -# * TODO(sissel): Option to keep the index, type, and doc id so we can do reindexing? +# TODO(sissel): Option to keep the index, type, and doc id so we can do reindexing? class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base config_name "elasticsearch" milestone 1 From 2810421c8a23702659832af4e1abd39e11f6e402 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 18 Nov 2014 18:35:54 -0500 Subject: [PATCH 008/207] Plugin structure change --- Gemfile | 5 +- Rakefile | 1 + logstash-input-elasticsearch.gemspec | 9 +- rakelib/publish.rake | 9 -- rakelib/vendor.rake | 169 --------------------------- spec/inputs/elasticsearch_spec.rb | 2 +- 6 files changed, 9 insertions(+), 186 deletions(-) delete mode 100644 rakelib/publish.rake delete mode 100644 rakelib/vendor.rake diff --git a/Gemfile b/Gemfile index ccb90122..c48c3d1e 100644 --- a/Gemfile +++ b/Gemfile @@ -1,4 +1,3 @@ source 'https://rubygems.org/' -gem 'rake' -gem 'gem_publisher' -gem 'archive-tar-minitar' +gemspec +gem "logstash", :github => "elasticsearch/logstash", :branch => "1.5" diff --git a/Rakefile b/Rakefile index da6b220e..4f4b8586 100644 --- a/Rakefile +++ b/Rakefile @@ -4,3 +4,4 @@ task :default do system("rake -T") end +require "logstash/devutils/rake" diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 76f0d8e8..7c75d6f8 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,13 +1,13 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '0.1.0' + s.version = '0.1.1' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" - s.description = "Read from an Elasticsearch cluster, based on search query results" + s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" s.authors = ["Elasticsearch"] s.email = 'richard.pijnenburg@elasticsearch.com' - s.homepage = "http://logstash.net/" + s.homepage = "http://www.elasticsearch.org/guide/en/logstash/current/index.html" s.require_paths = ["lib"] # Files @@ -17,12 +17,13 @@ Gem::Specification.new do |s| s.test_files = s.files.grep(%r{^(test|spec|features)/}) # Special flag to let us know this is actually a logstash plugin - s.metadata = { "logstash_plugin" => "true", "group" => "input" } + s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } # Gem dependencies s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0' s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] + s.add_development_dependency 'logstash-devutils' end diff --git a/rakelib/publish.rake b/rakelib/publish.rake deleted file mode 100644 index 0ef58c08..00000000 --- a/rakelib/publish.rake +++ /dev/null @@ -1,9 +0,0 @@ -require "gem_publisher" - -desc "Publish gem to RubyGems.org" -task :publish_gem do |t| - gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first - gem = GemPublisher.publish_if_updated(gem_file, :rubygems) - puts "Published #{gem}" if gem -end - diff --git a/rakelib/vendor.rake b/rakelib/vendor.rake deleted file mode 100644 index 2135119c..00000000 --- a/rakelib/vendor.rake +++ /dev/null @@ -1,169 +0,0 @@ -require "net/http" -require "uri" -require "digest/sha1" - -def vendor(*args) - return File.join("vendor", *args) -end - -directory "vendor/" => ["vendor"] do |task, args| - mkdir task.name -end - -def fetch(url, sha1, output) - - puts "Downloading #{url}" - actual_sha1 = download(url, output) - - if actual_sha1 != sha1 - fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')" - end -end # def fetch - -def file_fetch(url, sha1) - filename = File.basename( URI(url).path ) - output = "vendor/#{filename}" - task output => [ "vendor/" ] do - begin - actual_sha1 = file_sha1(output) - if actual_sha1 != sha1 - fetch(url, sha1, output) - end - rescue Errno::ENOENT - fetch(url, sha1, output) - end - end.invoke - - return output -end - -def file_sha1(path) - digest = Digest::SHA1.new - fd = File.new(path, "r") - while true - begin - digest << fd.sysread(16384) - rescue EOFError - break - end - end - return digest.hexdigest -ensure - fd.close if fd -end - -def download(url, output) - uri = URI(url) - digest = Digest::SHA1.new - tmp = "#{output}.tmp" - Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http| - request = Net::HTTP::Get.new(uri.path) - http.request(request) do |response| - fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code) - size = (response["content-length"].to_i || -1).to_f - count = 0 - File.open(tmp, "w") do |fd| - response.read_body do |chunk| - fd.write(chunk) - digest << chunk - if size > 0 && $stdout.tty? - count += chunk.bytesize - $stdout.write(sprintf("\r%0.2f%%", count/size * 100)) - end - end - end - $stdout.write("\r \r") if $stdout.tty? - end - end - - File.rename(tmp, output) - - return digest.hexdigest -rescue SocketError => e - puts "Failure while downloading #{url}: #{e}" - raise -ensure - File.unlink(tmp) if File.exist?(tmp) -end # def download - -def untar(tarball, &block) - require "archive/tar/minitar" - tgz = Zlib::GzipReader.new(File.open(tarball)) - # Pull out typesdb - tar = Archive::Tar::Minitar::Input.open(tgz) - tar.each do |entry| - path = block.call(entry) - next if path.nil? - parent = File.dirname(path) - - mkdir_p parent unless File.directory?(parent) - - # Skip this file if the output file is the same size - if entry.directory? - mkdir path unless File.directory?(path) - else - entry_mode = entry.instance_eval { @mode } & 0777 - if File.exists?(path) - stat = File.stat(path) - # TODO(sissel): Submit a patch to archive-tar-minitar upstream to - # expose headers in the entry. - entry_size = entry.instance_eval { @size } - # If file sizes are same, skip writing. - next if stat.size == entry_size && (stat.mode & 0777) == entry_mode - end - puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}" - File.open(path, "w") do |fd| - # eof? check lets us skip empty files. Necessary because the API provided by - # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an - # IO object. Something about empty files in this EntryStream causes - # IO.copy_stream to throw "can't convert nil into String" on JRuby - # TODO(sissel): File a bug about this. - while !entry.eof? - chunk = entry.read(16384) - fd.write(chunk) - end - #IO.copy_stream(entry, fd) - end - File.chmod(entry_mode, path) - end - end - tar.close - File.unlink(tarball) if File.file?(tarball) -end # def untar - -def ungz(file) - - outpath = file.gsub('.gz', '') - tgz = Zlib::GzipReader.new(File.open(file)) - begin - File.open(outpath, "w") do |out| - IO::copy_stream(tgz, out) - end - File.unlink(file) - rescue - File.unlink(outpath) if File.file?(outpath) - raise - end - tgz.close -end - -desc "Process any vendor files required for this plugin" -task "vendor" do |task, args| - - @files.each do |file| - download = file_fetch(file['url'], file['sha1']) - if download =~ /.tar.gz/ - prefix = download.gsub('.tar.gz', '').gsub('vendor/', '') - untar(download) do |entry| - if !file['files'].nil? - next unless file['files'].include?(entry.full_name.gsub(prefix, '')) - out = entry.full_name.split("/").last - end - File.join('vendor', out) - end - elsif download =~ /.gz/ - ungz(download) - end - end - -end diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 8fedeaf0..f4006303 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1,4 +1,4 @@ -require "spec_helper" +require "logstash/devutils/rspec/spec_helper" require "logstash/inputs/elasticsearch" require "elasticsearch" From bd29374704712f94133900774839456be4e1b108 Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Tue, 18 Nov 2014 17:01:24 -0800 Subject: [PATCH 009/207] Add run time dependency on json codec Fixes #5 --- logstash-input-elasticsearch.gemspec | 2 ++ 1 file changed, 2 insertions(+) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 7c75d6f8..53a3a136 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -24,6 +24,8 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] + s.add_runtime_dependency 'logstash-codec-json' + s.add_development_dependency 'logstash-devutils' end From 969be4c526397dcf43c21c8aae5dbf2c3f9805ec Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Wed, 19 Nov 2014 09:44:01 -0500 Subject: [PATCH 010/207] Change email for info@elasticsearch.com --- logstash-input-elasticsearch.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 53a3a136..fd698a1d 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -6,7 +6,7 @@ Gem::Specification.new do |s| s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" s.authors = ["Elasticsearch"] - s.email = 'richard.pijnenburg@elasticsearch.com' + s.email = 'info@elasticsearch.com' s.homepage = "http://www.elasticsearch.org/guide/en/logstash/current/index.html" s.require_paths = ["lib"] From 2e7743a74d4905012a2327e86de3354d17d21f70 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Thu, 18 Dec 2014 14:45:34 -0500 Subject: [PATCH 011/207] If the `docinfo_target` already exist in the ES document we will try to merge if the field is hash and we will throw an exception with any other types. Fixes #6 --- lib/logstash/inputs/elasticsearch.rb | 93 +++++++++-- spec/inputs/elasticsearch_spec.rb | 224 +++++++++++++++++++++++---- 2 files changed, 274 insertions(+), 43 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 1e531996..7fd24e42 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -18,9 +18,14 @@ # # This would create an Elasticsearch query with the following format: # [source,json] -# http://localhost:9200/logstash-*/_search?q='{ "query": { "match": { "statuscode": 200 } } }'&scroll=1m&size=1000 +# curl 'http://localhost:9200/logstash-*/_search?&scroll=1m&size=1000' -d '{ +# "query": { +# "match": { +# "statuscode": 200 +# } +# } +# }' # -# TODO(sissel): Option to keep the index, type, and doc id so we can do reindexing? class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base config_name "elasticsearch" milestone 1 @@ -37,7 +42,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base config :index, :validate => :string, :default => "logstash-*" # The query to be executed. - config :query, :validate => :string, :default => "*" + config :query, :validate => :string, :default => '{"query": { "match_all": {} } }' # Enable the Elasticsearch "scan" search type. This will disable # sorting but increase speed and performance. @@ -51,6 +56,45 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # round trip (i.e. between the previous scan scroll request, to the next). config :scroll, :validate => :string, :default => "1m" + # If set, include Elasticsearch document information such as index, type, and + # the id in the event. + # + # It might be important to note, with regards to metadata, that if you're + # ingesting documents with the intent to re-index them (or just update them) + # that the `action` option in the elasticsearch output want's to know how to + # handle those things. It can be dynamically assigned with a field + # added to the metadata. + # + # Example + # [source, ruby] + # input { + # elasticsearch { + # host => "es.production.mysite.org" + # index => "mydata-2018.09.*" + # query => "*" + # size => 500 + # scroll => "5m" + # docinfo => true + # } + # } + # output { + # elasticsearch { + # index => "copy-of-production.%{[@metadata][_index]}" + # index_type => "%{[@metadata][_type]}" + # document_id => "%{[@metadata][_id]}" + # } + # } + # + config :docinfo, :validate => :boolean, :default => false + + # Where to move the Elasticsearch document information by default we use the @metadata field. + config :docinfo_target, :validate=> :string, :default => "@metadata" + + # List of document metadata to move to the `docinfo_target` field + # To learn more about Elasticsearch metadata fields read + # http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_document_metadata.html + config :docinfo_fields, :validate => :array, :default => ['_index', '_type', '_id'] + # Basic Auth - username config :user, :validate => :string @@ -68,10 +112,10 @@ def register require "elasticsearch" @options = { - index: @index, - body: @query, - scroll: @scroll, - size: @size + :index => @index, + :body => @query, + :scroll => @scroll, + :size => @size } @options[:search_type] = 'scan' if @scan @@ -80,28 +124,27 @@ def register if @user && @password token = Base64.strict_encode64("#{@user}:#{@password.value}") - transport_options[:headers] = { Authorization: "Basic #{token}" } + transport_options[:headers] = { :Authorization => "Basic #{token}" } end hosts = if @ssl then - @hosts.map {|h| { host: h, scheme: 'https' } } + @hosts.map { |h| { :host => h, :scheme => 'https' } } else @hosts end if @ssl && @ca_file - transport_options[:ssl] = { ca_file: @ca_file } + transport_options[:ssl] = { :ca_file => @ca_file } end - @client = Elasticsearch::Client.new hosts: hosts, transport_options: transport_options - - end # def register - + @client = Elasticsearch::Client.new(:hosts => hosts, :transport_options => transport_options) + end + public def run(output_queue) # get first wave of data - r = @client.search @options + r = @client.search(@options) # since 'scan' doesn't return data on the search call, do an extra scroll if @scan @@ -109,8 +152,24 @@ def run(output_queue) end while r['hits']['hits'].any? do - r['hits']['hits'].each do |event| + r['hits']['hits'].each do |hit| + event = LogStash::Event.new(hit['_source']) decorate(event) + + if @docinfo + event[@docinfo_target] ||= {} + + unless event[@docinfo_target].is_a?(Hash) + @logger.error("Elasticsearch Input: Incompatible Event, incompatible type for the `@metadata` field in the `_source` document, expected a hash got:", :metadata_type => event[@docinfo_target].class) + + raise Exception.new("Elasticsearch input: incompatible event") + end + + @docinfo_fields.each do |field| + event[@docinfo_target][field] = hit[field] + end + end + output_queue << event end r = scroll_request(r['_scroll_id']) @@ -119,6 +178,6 @@ def run(output_queue) private def scroll_request scroll_id - @client.scroll(body: scroll_id, scroll: @scroll) + @client.scroll(:body => scroll_id, :scroll => @scroll) end end # class LogStash::Inputs::Elasticsearch diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index f4006303..f1292494 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1,11 +1,10 @@ +# encoding: utf-8 require "logstash/devutils/rspec/spec_helper" require "logstash/inputs/elasticsearch" require "elasticsearch" describe "inputs/elasticsearch" do - it "should retrieve json event from elasticseach" do - config = %q[ input { elasticsearch { @@ -33,7 +32,7 @@ "_type" => "logs", "_id" => "C5b2xLQwTZa76jBmHIbwHQ", "_score" => 1.0, - "fields" => {"message" => ["ohayo"] } + "_source" => { "message" => ["ohayo"] } } ] } } @@ -46,23 +45,15 @@ client = Elasticsearch::Client.new expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) expect(client).to receive(:search).with(any_args).and_return(response) - expect(client).to receive(:scroll).with({:body=>"cXVlcnlUaGVuRmV0Y2g", :scroll=>"1m"}).and_return(scroll_reponse) - - pipeline = LogStash::Pipeline.new(config) - queue = Queue.new - pipeline.instance_eval do - @output_func = lambda { |event| queue << event } - end - pipeline_thread = Thread.new { pipeline.run } - event = queue.pop + expect(client).to receive(:scroll).with({ :body => "cXVlcnlUaGVuRmV0Y2g", :scroll=> "1m" }).and_return(scroll_reponse) - insist { event["fields"]["message"] } == [ "ohayo" ] + event = fetch_event(config) - pipeline_thread.join + insist { event }.is_a?(LogStash::Event) + insist { event["message"] } == [ "ohayo" ] end it "should retrieve json event from elasticseach with scan" do - config = %q[ input { elasticsearch { @@ -95,7 +86,7 @@ "_type" => "logs", "_id" => "C5b2xLQwTZa76jBmHIbwHQ", "_score" => 1.0, - "fields" => {"message" => ["ohayo"] } + "_source" => { "message" => ["ohayo"] } } ] } }, @@ -108,19 +99,200 @@ client = Elasticsearch::Client.new expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) expect(client).to receive(:search).with(any_args).and_return(scan_response) - expect(client).to receive(:scroll).with({:body=>"DcrY3G1xff6SB", :scroll=>"1m"}).and_return(scroll_responses.first) - expect(client).to receive(:scroll).with({:body=>"cXVlcnlUaGVuRmV0Y2g", :scroll=>"1m"}).and_return(scroll_responses.last) + expect(client).to receive(:scroll).with({ :body => "DcrY3G1xff6SB", :scroll => "1m" }).and_return(scroll_responses.first) + expect(client).to receive(:scroll).with({ :body=> "cXVlcnlUaGVuRmV0Y2g", :scroll => "1m" }).and_return(scroll_responses.last) + + event = fetch_event(config) + + insist { event }.is_a?(LogStash::Event) + insist { event["message"] } == [ "ohayo" ] + end - pipeline = LogStash::Pipeline.new(config) - queue = Queue.new - pipeline.instance_eval do - @output_func = lambda { |event| queue << event } + context "with Elasticsearch document information" do + let!(:response) do + { + "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "failed" => 0 + }, + "hits" => { + "total" => 1, + "max_score" => 1.0, + "hits" => [ { + "_index" => "logstash-2014.10.12", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "_source" => { + "message" => ["ohayo"], + "metadata_with_hash" => { "awesome" => "logstash" }, + "metadata_with_string" => "a string" + } + } ] + } + } end - pipeline_thread = Thread.new { pipeline.run } - event = queue.pop - insist { event["fields"]["message"] } == [ "ohayo" ] + let(:scroll_reponse) do + { + "_scroll_id" => "r453Wc1jh0caLJhSDg", + "hits" => { "hits" => [] } + } + end + + let(:client) { Elasticsearch::Client.new } + + before do + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + expect(client).to receive(:search).with(any_args).and_return(response) + allow(client).to receive(:scroll).with({ :body => "cXVlcnlUaGVuRmV0Y2g", :scroll => "1m" }).and_return(scroll_reponse) + end + + context 'when defining docinfo' do + let(:config_metadata) do + %q[ + input { + elasticsearch { + hosts => ["node01"] + scan => false + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + docinfo => true + } + } + ] + end + + it 'merges the values if the `docinfo_target` already exist in the `_source` document' do + metadata_field = 'metadata_with_hash' + + config_metadata_with_hash = %Q[ + input { + elasticsearch { + hosts => ["node01"] + scan => false + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + docinfo => true + docinfo_target => '#{metadata_field}' + } + } + ] + + event = fetch_event(config_metadata_with_hash) + + expect(event[metadata_field]["_index"]).to eq('logstash-2014.10.12') + expect(event[metadata_field]["_type"]).to eq('logs') + expect(event[metadata_field]["_id"]).to eq('C5b2xLQwTZa76jBmHIbwHQ') + expect(event[metadata_field]["awesome"]).to eq("logstash") + end + + it 'thows an exception if the `docinfo_target` exist but is not of type hash' do + metadata_field = 'metadata_with_string' + + config_metadata_with_string = %Q[ + input { + elasticsearch { + hosts => ["node01"] + scan => false + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + docinfo => true + docinfo_target => '#{metadata_field}' + } + } + ] + + pipeline = LogStash::Pipeline.new(config_metadata_with_string) + queue = Queue.new + pipeline.instance_eval do + @output_func = lambda { |event| queue << event } + end + + expect { pipeline.run }.to raise_error(Exception, /incompatible event/) + end + + it "should move the document info to the @metadata field" do + event = fetch_event(config_metadata) - pipeline_thread.join + expect(event["[@metadata][_index]"]).to eq('logstash-2014.10.12') + expect(event["[@metadata][_type]"]).to eq('logs') + expect(event["[@metadata][_id]"]).to eq('C5b2xLQwTZa76jBmHIbwHQ') + end + + it 'should move the document information to the specified field' do + config = %q[ + input { + elasticsearch { + hosts => ["node01"] + scan => false + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + docinfo => true + docinfo_target => 'meta' + } + } + ] + event = fetch_event(config) + + expect(event["[meta][_index]"]).to eq('logstash-2014.10.12') + expect(event["[meta][_type]"]).to eq('logs') + expect(event["[meta][_id]"]).to eq('C5b2xLQwTZa76jBmHIbwHQ') + end + + it "should allow to specify which fields from the document info to save to the @metadata field" do + fields = ["_index"] + config = %Q[ + input { + elasticsearch { + hosts => ["node01"] + scan => false + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + docinfo => true + docinfo_fields => #{fields} + } + }] + + event = fetch_event(config) + + expect(event["@metadata"].keys).to eq(fields) + expect(event["[@metadata][_type]"]).to eq(nil) + expect(event["[@metadata][_index]"]).to eq('logstash-2014.10.12') + expect(event["[@metadata][_id]"]).to eq(nil) + end + end + + context "when not defining the docinfo" do + it 'should keep the document information in the root of the event' do + config = %q[ + input { + elasticsearch { + hosts => ["node01"] + scan => false + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + } + } + ] + event = fetch_event(config) + + expect(event["[@metadata][_index]"]).to eq(nil) + expect(event["[@metadata][_type]"]).to eq(nil) + expect(event["[@metadata][_id]"]).to eq(nil) + end + end end end + +def fetch_event(config) + pipeline = LogStash::Pipeline.new(config) + queue = Queue.new + pipeline.instance_eval do + @output_func = lambda { |event| queue << event } + end + pipeline_thread = Thread.new { pipeline.run } + event = queue.pop + + pipeline_thread.join + + return event +end From 893aac7193c2d76b12b927ecc5495b7d44585d65 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 20 Jan 2015 15:35:49 -0500 Subject: [PATCH 012/207] Updating License to 2015 --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index f8b711d5..8eba2be1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2012-2014 Elasticsearch +Copyright (c) 2012-2015 Elasticsearch Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From 144f81d2272621fd7a21de38921726b7ac2ea140 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 20 Jan 2015 15:37:09 -0500 Subject: [PATCH 013/207] Removing milestone option --- lib/logstash/inputs/elasticsearch.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 7fd24e42..da5088c0 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -28,7 +28,6 @@ # class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base config_name "elasticsearch" - milestone 1 default :codec, "json" From 195da26c49039b8ff8a351e66c4836fdcdda5420 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 20 Jan 2015 15:42:17 -0500 Subject: [PATCH 014/207] New Readme --- README.md | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 00000000..4b24b481 --- /dev/null +++ b/README.md @@ -0,0 +1,95 @@ +# Logstash Plugin + +This is a plugin for [Logstash](https://github.com/elasticsearch/logstash). + +It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way. + +## Documentation + +Logstash provides infrastructure to automatically generate documentation for this plugin. We use the asciidoc format to write documentation so any comments in the source code will be first converted into asciidoc and then into html. All plugin documentation are placed under one [central location](http://www.elasticsearch.org/guide/en/logstash/current/). + +- For formatting code or config example, you can use the asciidoc `[source,ruby]` directive +- For more asciidoc formatting tips, see the excellent reference here https://github.com/elasticsearch/docs#asciidoc-guide + +## Need Help? + +Need help? Try #logstash on freenode IRC or the logstash-users@googlegroups.com mailing list. + +## Developing + +### 1. Plugin Developement and Testing + +#### Code +- To get started, you'll need JRuby with the Bundler gem installed. + +- Create a new plugin or clone and existing from the GitHub [logstash-plugins](https://github.com/logstash-plugins) organization. + +- Install dependencies +```sh +bundle install +``` + +#### Test + +```sh +bundle exec rspec +``` + +The Logstash code required to run the tests/specs is specified in the `Gemfile` by the line similar to: +```ruby +gem "logstash", :github => "elasticsearch/logstash", :branch => "1.5" +``` +To test against another version or a local Logstash, edit the `Gemfile` to specify an alternative location, for example: +```ruby +gem "logstash", :github => "elasticsearch/logstash", :ref => "master" +``` +```ruby +gem "logstash", :path => "/your/local/logstash" +``` + +Then update your dependencies and run your tests: + +```sh +bundle install +bundle exec rspec +``` + +### 2. Running your unpublished Plugin in Logstash + +#### 2.1 Run in a local Logstash clone + +- Edit Logstash `tools/Gemfile` and add the local plugin path, for example: +```ruby +gem "logstash-filter-awesome", :path => "/your/local/logstash-filter-awesome" +``` +- Update Logstash dependencies +```sh +rake vendor:gems +``` +- Run Logstash with your plugin +```sh +bin/logstash -e 'filter {awesome {}}' +``` +At this point any modifications to the plugin code will be applied to this local Logstash setup. After modifying the plugin, simply rerun Logstash. + +#### 2.2 Run in an installed Logstash + +- Build your plugin gem +```sh +gem build logstash-filter-awesome.gemspec +``` +- Install the plugin from the Logstash home +```sh +bin/plugin install /your/local/plugin/logstash-filter-awesome.gem +``` +- Start Logstash and proceed to test the plugin + +## Contributing + +All contributions are welcome: ideas, patches, documentation, bug reports, complaints, and even something you drew up on a napkin. + +Programming is not a required skill. Whatever you've seen about open source and maintainers or community members saying "send patches or die" - you will not see that here. + +It is more important to me that you are able to contribute. + +For more information about contributing, see the [CONTRIBUTING](https://github.com/elasticsearch/logstash/blob/master/CONTRIBUTING.md) file. From 02cf4f5a9b237eda9a65dec880ae7ba75ef61c89 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 20 Jan 2015 15:46:46 -0500 Subject: [PATCH 015/207] bump version --- logstash-input-elasticsearch.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index fd698a1d..c464bea6 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '0.1.1' + s.version = '0.1.2' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" From 4c0e30369a8226865cf14928af07a7faf4e37c12 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 27 Jan 2015 11:02:07 -0500 Subject: [PATCH 016/207] New contributors file --- CONTRIBUTORS | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 CONTRIBUTORS diff --git a/CONTRIBUTORS b/CONTRIBUTORS new file mode 100644 index 00000000..4ac8fe0a --- /dev/null +++ b/CONTRIBUTORS @@ -0,0 +1,13 @@ +The following is a list of people who have contributed ideas, code, bug +reports, or in general have helped logstash along its way. + +Contributors: +* Colin Surprenant (colinsurprenant) +* João Duarte (jsvd) +* Pier-Hugues Pellerin (ph) +* Richard Pijnenburg (electrical) + +Note: If you've sent us patches, bug reports, or otherwise contributed to +Logstash, and you aren't on the list above and want to be, please let us know +and we'll make sure you're here. Contributions from folks like you are what make +open source awesome. From ecdca648965d1de84b371d3915e519ee8e594495 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 27 Jan 2015 11:51:05 -0500 Subject: [PATCH 017/207] New contributors file, without selfish mode on --- CONTRIBUTORS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 4ac8fe0a..f91ccc76 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -3,9 +3,13 @@ reports, or in general have helped logstash along its way. Contributors: * Colin Surprenant (colinsurprenant) +* Jonathan Van Eenwyk (jdve) +* Jordan Sissel (jordansissel) * João Duarte (jsvd) +* Kurt Hurtado (kurtado) * Pier-Hugues Pellerin (ph) * Richard Pijnenburg (electrical) +* Suyog Rao (suyograo) Note: If you've sent us patches, bug reports, or otherwise contributed to Logstash, and you aren't on the list above and want to be, please let us know From 8a51204af83dc17b14b04826d62faa08520b40b5 Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Tue, 27 Jan 2015 15:15:48 -0800 Subject: [PATCH 018/207] Add rspec tag to tests and use localhost Fixes #9 --- spec/inputs/elasticsearch_spec.rb | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index f1292494..cda717c1 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -3,12 +3,12 @@ require "logstash/inputs/elasticsearch" require "elasticsearch" -describe "inputs/elasticsearch" do +describe "inputs/elasticsearch", :elasticsearch => true do it "should retrieve json event from elasticseach" do config = %q[ input { elasticsearch { - hosts => ["node01"] + hosts => ["localhost"] scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' } @@ -57,7 +57,7 @@ config = %q[ input { elasticsearch { - hosts => ["node01"] + hosts => ["localhost"] scan => true query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' } @@ -157,7 +157,7 @@ %q[ input { elasticsearch { - hosts => ["node01"] + hosts => ["localhost"] scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' docinfo => true @@ -172,7 +172,7 @@ config_metadata_with_hash = %Q[ input { elasticsearch { - hosts => ["node01"] + hosts => ["localhost"] scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' docinfo => true @@ -195,7 +195,7 @@ config_metadata_with_string = %Q[ input { elasticsearch { - hosts => ["node01"] + hosts => ["localhost"] scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' docinfo => true @@ -225,7 +225,7 @@ config = %q[ input { elasticsearch { - hosts => ["node01"] + hosts => ["localhost"] scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' docinfo => true @@ -245,7 +245,7 @@ config = %Q[ input { elasticsearch { - hosts => ["node01"] + hosts => ["localhost"] scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' docinfo => true @@ -267,7 +267,7 @@ config = %q[ input { elasticsearch { - hosts => ["node01"] + hosts => ["localhost"] scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' } From 6e180000472b920888ee60639f319c7d76ab153a Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Tue, 27 Jan 2015 15:23:46 -0800 Subject: [PATCH 019/207] Bump verson to 0.1.3 Fixes #10 --- logstash-input-elasticsearch.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index c464bea6..49c139de 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '0.1.2' + s.version = '0.1.3' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" From 85f98e512de960ca9b640bbcb59209890c24ccac Mon Sep 17 00:00:00 2001 From: Pere Urbon-Bayes Date: Thu, 26 Feb 2015 17:53:32 +0100 Subject: [PATCH 020/207] Remove traces and references the meta logstash gem and change it to use the new logstash-core gem. --- Gemfile | 3 +-- README.md | 33 ++++++++++------------------ logstash-input-elasticsearch.gemspec | 4 ++-- 3 files changed, 15 insertions(+), 25 deletions(-) diff --git a/Gemfile b/Gemfile index c48c3d1e..d9266971 100644 --- a/Gemfile +++ b/Gemfile @@ -1,3 +1,2 @@ source 'https://rubygems.org/' -gemspec -gem "logstash", :github => "elasticsearch/logstash", :branch => "1.5" +gemspec \ No newline at end of file diff --git a/README.md b/README.md index 4b24b481..185c8395 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Need help? Try #logstash on freenode IRC or the logstash-users@googlegroups.com #### Code - To get started, you'll need JRuby with the Bundler gem installed. -- Create a new plugin or clone and existing from the GitHub [logstash-plugins](https://github.com/logstash-plugins) organization. +- Create a new plugin or clone and existing from the GitHub [logstash-plugins](https://github.com/logstash-plugins) organization. We also provide [example plugins](https://github.com/logstash-plugins?query=example). - Install dependencies ```sh @@ -31,26 +31,15 @@ bundle install #### Test -```sh -bundle exec rspec -``` +- Update your dependencies -The Logstash code required to run the tests/specs is specified in the `Gemfile` by the line similar to: -```ruby -gem "logstash", :github => "elasticsearch/logstash", :branch => "1.5" -``` -To test against another version or a local Logstash, edit the `Gemfile` to specify an alternative location, for example: -```ruby -gem "logstash", :github => "elasticsearch/logstash", :ref => "master" -``` -```ruby -gem "logstash", :path => "/your/local/logstash" +```sh +bundle install ``` -Then update your dependencies and run your tests: +- Run tests ```sh -bundle install bundle exec rspec ``` @@ -58,13 +47,13 @@ bundle exec rspec #### 2.1 Run in a local Logstash clone -- Edit Logstash `tools/Gemfile` and add the local plugin path, for example: +- Edit Logstash `Gemfile` and add the local plugin path, for example: ```ruby gem "logstash-filter-awesome", :path => "/your/local/logstash-filter-awesome" ``` -- Update Logstash dependencies +- Install plugin ```sh -rake vendor:gems +bin/plugin install --no-verify ``` - Run Logstash with your plugin ```sh @@ -74,6 +63,8 @@ At this point any modifications to the plugin code will be applied to this local #### 2.2 Run in an installed Logstash +You can use the same **2.1** method to run your plugin in an installed Logstash by editing its `Gemfile` and pointing the `:path` to your local plugin development directory or you can build the gem and install it using: + - Build your plugin gem ```sh gem build logstash-filter-awesome.gemspec @@ -90,6 +81,6 @@ All contributions are welcome: ideas, patches, documentation, bug reports, compl Programming is not a required skill. Whatever you've seen about open source and maintainers or community members saying "send patches or die" - you will not see that here. -It is more important to me that you are able to contribute. +It is more important to the community that you are able to contribute. -For more information about contributing, see the [CONTRIBUTING](https://github.com/elasticsearch/logstash/blob/master/CONTRIBUTING.md) file. +For more information about contributing, see the [CONTRIBUTING](https://github.com/elasticsearch/logstash/blob/master/CONTRIBUTING.md) file. \ No newline at end of file diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 49c139de..8def9d8d 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '0.1.3' + s.version = '0.1.4' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" @@ -20,7 +20,7 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } # Gem dependencies - s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0' + s.add_runtime_dependency "logstash-core", '>= 1.4.0', '< 2.0.0' s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] From 91e8877a1088bdc5d4778dc344ae5143145bc954 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Mon, 20 Apr 2015 09:55:31 -0400 Subject: [PATCH 021/207] Updating license and readme file with the elastic rebrand --- LICENSE | 2 +- logstash-input-elasticsearch.gemspec | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/LICENSE b/LICENSE index 8eba2be1..8026afdc 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2012-2015 Elasticsearch +Copyright (c) 2012–2015 Elasticsearch Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 8def9d8d..2c790ea3 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -5,9 +5,9 @@ Gem::Specification.new do |s| s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" - s.authors = ["Elasticsearch"] - s.email = 'info@elasticsearch.com' - s.homepage = "http://www.elasticsearch.org/guide/en/logstash/current/index.html" + s.authors = ["Elastic"] + s.email = 'info@elastic.co' + s.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html" s.require_paths = ["lib"] # Files From 3ce97e12c1c112a7da5f402bfa4c04285ee454c1 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Mon, 20 Apr 2015 09:57:03 -0400 Subject: [PATCH 022/207] bump version --- logstash-input-elasticsearch.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 2c790ea3..9bfcccd0 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '0.1.4' + s.version = '0.1.5' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" From 5b132a56dedcedfc92723047709d12abf39dbdb8 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Thu, 4 Jun 2015 13:34:16 -0400 Subject: [PATCH 023/207] add CHANGELOG.md --- CHANGELOG.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..e69de29b From f18a8dafc6eced065c165b793e72d02611ebed64 Mon Sep 17 00:00:00 2001 From: Shaunak Kashyap Date: Thu, 4 Jun 2015 16:00:26 -0700 Subject: [PATCH 024/207] Replacing mailing list with discussion forum link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 185c8395..4cd34d46 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Logstash provides infrastructure to automatically generate documentation for thi ## Need Help? -Need help? Try #logstash on freenode IRC or the logstash-users@googlegroups.com mailing list. +Need help? Try #logstash on freenode IRC or the https://discuss.elastic.co/c/logstash discussion forum. ## Developing From 726b14115334f8173bd591fdb8e831af389a6ab3 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Fri, 19 Jun 2015 13:33:38 -0400 Subject: [PATCH 025/207] Adding NOTICE.txt --- NOTICE.TXT | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 NOTICE.TXT diff --git a/NOTICE.TXT b/NOTICE.TXT new file mode 100644 index 00000000..0b8a9475 --- /dev/null +++ b/NOTICE.TXT @@ -0,0 +1,5 @@ +Elasticsearch +Copyright 2012-2015 Elasticsearch + +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). \ No newline at end of file From b8ae27b2b11ef289952428fefde67b0204087dfb Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 23 Jun 2015 07:27:06 -0400 Subject: [PATCH 026/207] bump to 1.0.0 --- logstash-input-elasticsearch.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 9bfcccd0..0922940f 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '0.1.5' + s.version = '1.0.0' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" From 876d7988ed51272950985e208b6abb571f797fbb Mon Sep 17 00:00:00 2001 From: Dmitry Kryuchkov Date: Tue, 7 Jul 2015 16:25:22 +1000 Subject: [PATCH 027/207] Fix example to have correct configuration key --- lib/logstash/inputs/elasticsearch.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index da5088c0..d39a4401 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -11,7 +11,7 @@ # input { # # Read all documents from Elasticsearch matching the given query # elasticsearch { -# host => "localhost" +# hosts => "localhost" # query => '{ "query": { "match": { "statuscode": 200 } } }' # } # } From 130ee3b8fb4a9bdd7cb974480a5b9eca4093515e Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Wed, 20 May 2015 10:22:53 +0100 Subject: [PATCH 028/207] properly handle ports when using ssl --- lib/logstash/inputs/elasticsearch.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index d39a4401..8edad8b4 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -127,7 +127,10 @@ def register end hosts = if @ssl then - @hosts.map { |h| { :host => h, :scheme => 'https' } } + @hosts.map do |h| + host, port = h.split(":") + { :host => host, :scheme => 'https', :port => port } + end else @hosts end From bfbcc44dc8c8b1ea041bfb66c61648f219963f40 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Wed, 20 May 2015 09:18:38 +0100 Subject: [PATCH 029/207] remove unused port parameter Fixes #17 --- lib/logstash/inputs/elasticsearch.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 8edad8b4..da5ad860 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -32,11 +32,10 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base default :codec, "json" # List of elasticsearch hosts to use for querying. + # each host can be either IP, HOST, IP:port or HOST:port + # port defaults to 9200 config :hosts, :validate => :array - # The HTTP port of your Elasticsearch server's REST interface. - config :port, :validate => :number, :default => 9200 - # The index or alias to search. config :index, :validate => :string, :default => "logstash-*" From 54366ce4f36bc52ac0367312db7b59fd656c65c1 Mon Sep 17 00:00:00 2001 From: Antonio Bonuccelli Date: Tue, 19 May 2015 14:24:01 +0200 Subject: [PATCH 030/207] Example still refering to 1.4.2 Example still refering to 1.4.2 and not consistent with parameters reference host => hosts Fixes #17 --- lib/logstash/inputs/elasticsearch.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index da5ad860..72d47ad2 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -67,7 +67,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # [source, ruby] # input { # elasticsearch { - # host => "es.production.mysite.org" + # hosts => "es.production.mysite.org" # index => "mydata-2018.09.*" # query => "*" # size => 500 From 60b7de3027fbf3f61aa170e5dbfca9b5a66d0d41 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Wed, 26 Aug 2015 09:15:02 -0400 Subject: [PATCH 031/207] Update README with the correct link to elastic --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 4cd34d46..3a070f93 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,15 @@ # Logstash Plugin -This is a plugin for [Logstash](https://github.com/elasticsearch/logstash). +This is a plugin for [Logstash](https://github.com/elastic/logstash). It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way. ## Documentation -Logstash provides infrastructure to automatically generate documentation for this plugin. We use the asciidoc format to write documentation so any comments in the source code will be first converted into asciidoc and then into html. All plugin documentation are placed under one [central location](http://www.elasticsearch.org/guide/en/logstash/current/). +Logstash provides infrastructure to automatically generate documentation for this plugin. We use the asciidoc format to write documentation so any comments in the source code will be first converted into asciidoc and then into html. All plugin documentation are placed under one [central location](http://www.elastic.co/guide/en/logstash/current/). - For formatting code or config example, you can use the asciidoc `[source,ruby]` directive -- For more asciidoc formatting tips, see the excellent reference here https://github.com/elasticsearch/docs#asciidoc-guide +- For more asciidoc formatting tips, see the excellent reference here https://github.com/elastic/docs#asciidoc-guide ## Need Help? @@ -83,4 +83,4 @@ Programming is not a required skill. Whatever you've seen about open source and It is more important to the community that you are able to contribute. -For more information about contributing, see the [CONTRIBUTING](https://github.com/elasticsearch/logstash/blob/master/CONTRIBUTING.md) file. \ No newline at end of file +For more information about contributing, see the [CONTRIBUTING](https://github.com/elastic/logstash/blob/master/CONTRIBUTING.md) file. \ No newline at end of file From 9beba02ab83790a8c0788a21a0cb05e55cc48207 Mon Sep 17 00:00:00 2001 From: Tal Levy Date: Fri, 28 Aug 2015 15:19:16 -0700 Subject: [PATCH 032/207] refactor request logic into own method. Main reasoning for this refactor is for a lower memory footprint since response is not assigned to a variable inside of the #run scope anymore Fixes #21. --- lib/logstash/inputs/elasticsearch.rb | 52 ++++++++++++++++------------ 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 72d47ad2..c8ba825b 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -140,6 +140,32 @@ def register @client = Elasticsearch::Client.new(:hosts => hosts, :transport_options => transport_options) end + + private + def run_next(output_queue, scroll_id) + r = scroll_request(scroll_id) + r['hits']['hits'].each do |hit| + event = LogStash::Event.new(hit['_source']) + decorate(event) + + if @docinfo + event[@docinfo_target] ||= {} + + unless event[@docinfo_target].is_a?(Hash) + @logger.error("Elasticsearch Input: Incompatible Event, incompatible type for the `@metadata` field in the `_source` document, expected a hash got:", :metadata_type => event[@docinfo_target].class) + + raise Exception.new("Elasticsearch input: incompatible event") + end + + @docinfo_fields.each do |field| + event[@docinfo_target][field] = hit[field] + end + end + output_queue << event + end + + {:has_hits => r['hits']['hits'].any?, :scroll_id => r['_scroll_id']} + end public def run(output_queue) @@ -149,31 +175,11 @@ def run(output_queue) # since 'scan' doesn't return data on the search call, do an extra scroll if @scan - r = scroll_request(r['_scroll_id']) + resp = run_next(output_queue, r['_scroll_id']) end - while r['hits']['hits'].any? do - r['hits']['hits'].each do |hit| - event = LogStash::Event.new(hit['_source']) - decorate(event) - - if @docinfo - event[@docinfo_target] ||= {} - - unless event[@docinfo_target].is_a?(Hash) - @logger.error("Elasticsearch Input: Incompatible Event, incompatible type for the `@metadata` field in the `_source` document, expected a hash got:", :metadata_type => event[@docinfo_target].class) - - raise Exception.new("Elasticsearch input: incompatible event") - end - - @docinfo_fields.each do |field| - event[@docinfo_target][field] = hit[field] - end - end - - output_queue << event - end - r = scroll_request(r['_scroll_id']) + while resp[:has_hits] do + resp = run_next(output_queue, resp[:scroll_id]) end end # def run From 6b46c1aed188400ab9eff56b3677232b476ccd91 Mon Sep 17 00:00:00 2001 From: Tal Levy Date: Fri, 28 Aug 2015 16:34:12 -0700 Subject: [PATCH 033/207] bump version to 1.0.1 --- CHANGELOG.md | 2 ++ logstash-input-elasticsearch.gemspec | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e69de29b..194d3153 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -0,0 +1,2 @@ +## 1.0.1 + - refactor request logic into own method (better memory gc perf) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 0922940f..17803574 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '1.0.0' + s.version = '1.0.1' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" From 9838d47a5dbb8ecd37bbfd4a79a907b5c4268880 Mon Sep 17 00:00:00 2001 From: Tal Levy Date: Mon, 31 Aug 2015 10:57:07 -0700 Subject: [PATCH 034/207] refactor specs and fix scan scroll response handling --- lib/logstash/inputs/elasticsearch.rb | 65 +++++++++++++++------------- spec/inputs/elasticsearch_spec.rb | 44 +++++++++---------- 2 files changed, 58 insertions(+), 51 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index c8ba825b..47160d27 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -141,32 +141,6 @@ def register @client = Elasticsearch::Client.new(:hosts => hosts, :transport_options => transport_options) end - private - def run_next(output_queue, scroll_id) - r = scroll_request(scroll_id) - r['hits']['hits'].each do |hit| - event = LogStash::Event.new(hit['_source']) - decorate(event) - - if @docinfo - event[@docinfo_target] ||= {} - - unless event[@docinfo_target].is_a?(Hash) - @logger.error("Elasticsearch Input: Incompatible Event, incompatible type for the `@metadata` field in the `_source` document, expected a hash got:", :metadata_type => event[@docinfo_target].class) - - raise Exception.new("Elasticsearch input: incompatible event") - end - - @docinfo_fields.each do |field| - event[@docinfo_target][field] = hit[field] - end - end - output_queue << event - end - - {:has_hits => r['hits']['hits'].any?, :scroll_id => r['_scroll_id']} - end - public def run(output_queue) @@ -175,14 +149,47 @@ def run(output_queue) # since 'scan' doesn't return data on the search call, do an extra scroll if @scan - resp = run_next(output_queue, r['_scroll_id']) + r = process_next_scroll(output_queue, r['_scroll_id']) + has_hits = r['has_hits'] + else # not a scan, process the response + r['hits']['hits'].each { |hit| push_hit(hit, output_queue) } + has_hits = r['hits']['hits'].any? end - while resp[:has_hits] do - resp = run_next(output_queue, resp[:scroll_id]) + while has_hits do + r = process_next_scroll(output_queue, r['_scroll_id']) + has_hits = r['has_hits'] end end # def run + private + def process_next_scroll(output_queue, scroll_id) + r = scroll_request(scroll_id) + r['hits']['hits'].each { |hit| push_hit(hit, output_queue) } + {'has_hits' => r['hits']['hits'].any?, '_scroll_id' => r['_scroll_id']} + end + + private + def push_hit(hit, output_queue) + event = LogStash::Event.new(hit['_source']) + decorate(event) + + if @docinfo + event[@docinfo_target] ||= {} + + unless event[@docinfo_target].is_a?(Hash) + @logger.error("Elasticsearch Input: Incompatible Event, incompatible type for the `@metadata` field in the `_source` document, expected a hash got:", :metadata_type => event[@docinfo_target].class) + + raise Exception.new("Elasticsearch input: incompatible event") + end + + @docinfo_fields.each do |field| + event[@docinfo_target][field] = hit[field] + end + end + output_queue << event + end + private def scroll_request scroll_id @client.scroll(:body => scroll_id, :scroll => @scroll) diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index cda717c1..8b6f1e72 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -3,7 +3,7 @@ require "logstash/inputs/elasticsearch" require "elasticsearch" -describe "inputs/elasticsearch", :elasticsearch => true do +describe LogStash::Inputs::Elasticsearch do it "should retrieve json event from elasticseach" do config = %q[ input { @@ -47,7 +47,9 @@ expect(client).to receive(:search).with(any_args).and_return(response) expect(client).to receive(:scroll).with({ :body => "cXVlcnlUaGVuRmV0Y2g", :scroll=> "1m" }).and_return(scroll_reponse) - event = fetch_event(config) + event = input(config) do |pipeline, queue| + queue.pop + end insist { event }.is_a?(LogStash::Event) insist { event["message"] } == [ "ohayo" ] @@ -102,7 +104,9 @@ expect(client).to receive(:scroll).with({ :body => "DcrY3G1xff6SB", :scroll => "1m" }).and_return(scroll_responses.first) expect(client).to receive(:scroll).with({ :body=> "cXVlcnlUaGVuRmV0Y2g", :scroll => "1m" }).and_return(scroll_responses.last) - event = fetch_event(config) + event = input(config) do |pipeline, queue| + queue.pop + end insist { event }.is_a?(LogStash::Event) insist { event["message"] } == [ "ohayo" ] @@ -181,7 +185,9 @@ } ] - event = fetch_event(config_metadata_with_hash) + event = input(config_metadata_with_hash) do |pipeline, queue| + queue.pop + end expect(event[metadata_field]["_index"]).to eq('logstash-2014.10.12') expect(event[metadata_field]["_type"]).to eq('logs') @@ -214,7 +220,9 @@ end it "should move the document info to the @metadata field" do - event = fetch_event(config_metadata) + event = input(config_metadata) do |pipeline, queue| + queue.pop + end expect(event["[@metadata][_index]"]).to eq('logstash-2014.10.12') expect(event["[@metadata][_type]"]).to eq('logs') @@ -233,7 +241,9 @@ } } ] - event = fetch_event(config) + event = input(config) do |pipeline, queue| + queue.pop + end expect(event["[meta][_index]"]).to eq('logstash-2014.10.12') expect(event["[meta][_type]"]).to eq('logs') @@ -253,7 +263,9 @@ } }] - event = fetch_event(config) + event = input(config) do |pipeline, queue| + queue.pop + end expect(event["@metadata"].keys).to eq(fields) expect(event["[@metadata][_type]"]).to eq(nil) @@ -273,7 +285,9 @@ } } ] - event = fetch_event(config) + event = input(config) do |pipeline, queue| + queue.pop + end expect(event["[@metadata][_index]"]).to eq(nil) expect(event["[@metadata][_type]"]).to eq(nil) @@ -282,17 +296,3 @@ end end end - -def fetch_event(config) - pipeline = LogStash::Pipeline.new(config) - queue = Queue.new - pipeline.instance_eval do - @output_func = lambda { |event| queue << event } - end - pipeline_thread = Thread.new { pipeline.run } - event = queue.pop - - pipeline_thread.join - - return event -end From b56a94f2eb441a81f647e4414c942732e8232a46 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Thu, 3 Sep 2015 12:16:24 +0100 Subject: [PATCH 035/207] bumped to 1.0.2 --- CHANGELOG.md | 3 +++ logstash-input-elasticsearch.gemspec | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 194d3153..19042914 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,2 +1,5 @@ +## 1.0.2 (September 3 - 2015) + - fix scan/scroll response handling + ## 1.0.1 - refactor request logic into own method (better memory gc perf) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 17803574..9ba6d4ac 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '1.0.1' + s.version = '1.0.2' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" From 65e7973d6d0487789faaaef4c1932987a5063760 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Fri, 11 Sep 2015 10:34:48 +0100 Subject: [PATCH 036/207] remove git ls-files from gemspec --- logstash-input-elasticsearch.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 9ba6d4ac..022b85fc 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -11,7 +11,7 @@ Gem::Specification.new do |s| s.require_paths = ["lib"] # Files - s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*') + s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT'] # Tests s.test_files = s.files.grep(%r{^(test|spec|features)/}) From ef3c28886f8f5d2b9dcb5509570c05cf2417c039 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Tue, 8 Sep 2015 16:21:16 +0100 Subject: [PATCH 037/207] use plugin.stop to return from run Fixes #28 --- lib/logstash/inputs/elasticsearch.rb | 2 +- spec/inputs/elasticsearch_spec.rb | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 47160d27..bb8354e7 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -156,7 +156,7 @@ def run(output_queue) has_hits = r['hits']['hits'].any? end - while has_hits do + while has_hits && !stop? r = process_next_scroll(output_queue, r['_scroll_id']) has_hits = r['has_hits'] end diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 8b6f1e72..92cffead 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -4,6 +4,25 @@ require "elasticsearch" describe LogStash::Inputs::Elasticsearch do + + it_behaves_like "an interruptible input plugin" do + let(:esclient) { double("elasticsearch-client") } + let(:config) { { } } + + before :each do + allow(Elasticsearch::Client).to receive(:new).and_return(esclient) + hit = { + "_index" => "logstash-2014.10.12", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "_source" => { "message" => ["ohayo"] } + } + allow(esclient).to receive(:search) { { "hits" => { "hits" => [hit] } } } + allow(esclient).to receive(:scroll) { { "hits" => { "hits" => [hit] } } } + end + end + it "should retrieve json event from elasticseach" do config = %q[ input { From 8d2ba76fabf84f8c3b5ee1b1269c78c098e08247 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 22 Sep 2015 09:41:06 -0400 Subject: [PATCH 038/207] Update dependencies to logstash-core 2.0 and udate the major version --- logstash-input-elasticsearch.gemspec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 022b85fc..4756146b 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '1.0.2' + s.version = '2.0.0' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" @@ -20,7 +20,7 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } # Gem dependencies - s.add_runtime_dependency "logstash-core", '>= 1.4.0', '< 2.0.0' + s.add_runtime_dependency "logstash-core", ">= 2.0.0", "< 3.0.0" s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] From 7de8e554a0f82707b3fccae3b0164dcf635fdb25 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 22 Sep 2015 13:14:37 -0400 Subject: [PATCH 039/207] Declare a dependency on logstash-core snapshot instead of the release version --- logstash-input-elasticsearch.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 4756146b..68c2f857 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -20,7 +20,7 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } # Gem dependencies - s.add_runtime_dependency "logstash-core", ">= 2.0.0", "< 3.0.0" + s.add_runtime_dependency "logstash-core", "~> 2.0.0.snapshot" s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] From 761c16dd2cbbad427547fab9df1a9a4f4d8528c5 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Wed, 7 Oct 2015 11:11:22 -0400 Subject: [PATCH 040/207] update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19042914..46d141f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## 2.0.0 + - Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully, + instead of using Thread.raise on the plugins' threads. Ref: https://github.com/elastic/logstash/pull/3895 + - Dependency on logstash-core update to 2.0 + ## 1.0.2 (September 3 - 2015) - fix scan/scroll response handling From aaea10230fc48b6cf70dbc2835635776ef694f5c Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Wed, 7 Oct 2015 11:16:13 -0400 Subject: [PATCH 041/207] minor version bump --- logstash-input-elasticsearch.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 68c2f857..9034968e 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '2.0.0' + s.version = '2.0.1' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" From c67aba2c42a22ab87399ffb6954eee2a90c779e0 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Thu, 8 Oct 2015 09:39:09 -0400 Subject: [PATCH 042/207] dependency logstash-core >= 2.0.0.snapshot < 3.0.0 --- logstash-input-elasticsearch.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 9034968e..a1803c06 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -20,7 +20,7 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } # Gem dependencies - s.add_runtime_dependency "logstash-core", "~> 2.0.0.snapshot" + s.add_runtime_dependency "logstash-core", ">= 2.0.0.snapshot", "< 3.0.0" s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] From 911cdf9642dd85b0352285be6f7bc7e0d2adc331 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Wed, 14 Oct 2015 14:02:28 -0400 Subject: [PATCH 043/207] dependency logstash-core >= 2.0.0.beta2 < 3.0.0 --- logstash-input-elasticsearch.gemspec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index a1803c06..54aca630 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '2.0.1' + s.version = '2.0.2' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" @@ -20,7 +20,7 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } # Gem dependencies - s.add_runtime_dependency "logstash-core", ">= 2.0.0.snapshot", "< 3.0.0" + s.add_runtime_dependency "logstash-core", ">= 2.0.0.beta2", "< 3.0.0" s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] From 51411bff0150a5c6942e8eebc7d64f7c04d65a8e Mon Sep 17 00:00:00 2001 From: Pere Urbon-Bayes Date: Thu, 22 Oct 2015 16:05:44 +0200 Subject: [PATCH 044/207] Add a CI badge to show the build status --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 3a070f93..4471346d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ # Logstash Plugin +[![Build +Status](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-elasticsearch-unit/badge/icon)](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-elasticsearch-unit/) + This is a plugin for [Logstash](https://github.com/elastic/logstash). It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way. From c0fcf7851e5d09ec2a331f0ee08a616f491c4614 Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Mon, 7 Dec 2015 15:13:57 -0500 Subject: [PATCH 045/207] do not assume event field ref to return in-place mutable structure --- lib/logstash/inputs/elasticsearch.rb | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index bb8354e7..5765e122 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -60,7 +60,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # It might be important to note, with regards to metadata, that if you're # ingesting documents with the intent to re-index them (or just update them) # that the `action` option in the elasticsearch output want's to know how to - # handle those things. It can be dynamically assigned with a field + # handle those things. It can be dynamically assigned with a field # added to the metadata. # # Example @@ -86,7 +86,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base config :docinfo, :validate => :boolean, :default => false # Where to move the Elasticsearch document information by default we use the @metadata field. - config :docinfo_target, :validate=> :string, :default => "@metadata" + config :docinfo_target, :validate=> :string, :default => LogStash::Event::METADATA # List of document metadata to move to the `docinfo_target` field # To learn more about Elasticsearch metadata fields read @@ -105,7 +105,6 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # SSL Certificate Authority file config :ca_file, :validate => :path - public def register require "elasticsearch" @@ -141,9 +140,7 @@ def register @client = Elasticsearch::Client.new(:hosts => hosts, :transport_options => transport_options) end - public def run(output_queue) - # get first wave of data r = @client.search(@options) @@ -160,38 +157,42 @@ def run(output_queue) r = process_next_scroll(output_queue, r['_scroll_id']) has_hits = r['has_hits'] end - end # def run + end private + def process_next_scroll(output_queue, scroll_id) r = scroll_request(scroll_id) r['hits']['hits'].each { |hit| push_hit(hit, output_queue) } {'has_hits' => r['hits']['hits'].any?, '_scroll_id' => r['_scroll_id']} end - private def push_hit(hit, output_queue) event = LogStash::Event.new(hit['_source']) decorate(event) if @docinfo - event[@docinfo_target] ||= {} + # do not assume event[@docinfo_target] to be in-place updatable. first get it, update it, then at the end set it in the event. + docinfo_target = event[@docinfo_target] || {} - unless event[@docinfo_target].is_a?(Hash) - @logger.error("Elasticsearch Input: Incompatible Event, incompatible type for the `@metadata` field in the `_source` document, expected a hash got:", :metadata_type => event[@docinfo_target].class) + unless docinfo_target.is_a?(Hash) + @logger.error("Elasticsearch Input: Incompatible Event, incompatible type for the docinfo_target=#{@docinfo_target} field in the `_source` document, expected a hash got:", :docinfo_target_type => docinfo_target.class, :event => event) + # TODO: (colin) I am not sure raising is a good strategy here? raise Exception.new("Elasticsearch input: incompatible event") end @docinfo_fields.each do |field| - event[@docinfo_target][field] = hit[field] + docinfo_target[field] = hit[field] end + + event[@docinfo_target] = docinfo_target end + output_queue << event end - private def scroll_request scroll_id @client.scroll(:body => scroll_id, :scroll => @scroll) end -end # class LogStash::Inputs::Elasticsearch +end From a8692e3362bcd3994648b8d9bb66897334720aec Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Thu, 17 Dec 2015 14:17:59 -0500 Subject: [PATCH 046/207] bump version to 2.0.3 --- CHANGELOG.md | 5 ++++- logstash-input-elasticsearch.gemspec | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 46d141f8..bb9cfb5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ +## 2.0.3 + - Refactored field references and cleanups + ## 2.0.0 - - Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully, + - Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully, instead of using Thread.raise on the plugins' threads. Ref: https://github.com/elastic/logstash/pull/3895 - Dependency on logstash-core update to 2.0 diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 54aca630..312ecd4a 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '2.0.2' + s.version = '2.0.3' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" From 6f348de67b67bf519ba64dcdc8656df1d216e935 Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Thu, 21 Jan 2016 17:45:55 -0800 Subject: [PATCH 047/207] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4471346d..8fcf1f93 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ [![Build Status](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-elasticsearch-unit/badge/icon)](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-elasticsearch-unit/) +[![Gem Version](https://badge.fury.io/rb/logstash-input-elasticsearch.svg)](https://badge.fury.io/rb/logstash-input-elasticsearch) + This is a plugin for [Logstash](https://github.com/elastic/logstash). It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way. @@ -86,4 +88,4 @@ Programming is not a required skill. Whatever you've seen about open source and It is more important to the community that you are able to contribute. -For more information about contributing, see the [CONTRIBUTING](https://github.com/elastic/logstash/blob/master/CONTRIBUTING.md) file. \ No newline at end of file +For more information about contributing, see the [CONTRIBUTING](https://github.com/elastic/logstash/blob/master/CONTRIBUTING.md) file. From 454be5b8acdbded87893cba40c7bb97367d56434 Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Thu, 21 Jan 2016 17:47:10 -0800 Subject: [PATCH 048/207] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 8fcf1f93..82994c4c 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,7 @@ [![Build Status](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-elasticsearch-unit/badge/icon)](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-elasticsearch-unit/) - -[![Gem Version](https://badge.fury.io/rb/logstash-input-elasticsearch.svg)](https://badge.fury.io/rb/logstash-input-elasticsearch) +[![Gem info][badge-gem]][rubygems] This is a plugin for [Logstash](https://github.com/elastic/logstash). From e2f624bcfbceb19fa8a95bb7b3e6c573a3439477 Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Thu, 21 Jan 2016 17:47:48 -0800 Subject: [PATCH 049/207] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 82994c4c..b2c8a711 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Logstash Plugin +[![Gem info][badge-gem]][rubygems] [![Build Status](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-elasticsearch-unit/badge/icon)](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-elasticsearch-unit/) -[![Gem info][badge-gem]][rubygems] This is a plugin for [Logstash](https://github.com/elastic/logstash). From 2e1fc9f070ea8f0336cd55278d8924b0475f659b Mon Sep 17 00:00:00 2001 From: Suyog Rao Date: Thu, 21 Jan 2016 17:48:25 -0800 Subject: [PATCH 050/207] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b2c8a711..bb921caf 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Logstash Plugin -[![Gem info][badge-gem]][rubygems] +[![Gem Version](https://badge.fury.io/rb/logstash-input-elasticsearch.svg)](https://badge.fury.io/rb/logstash-input-elasticsearch) [![Build Status](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-elasticsearch-unit/badge/icon)](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-elasticsearch-unit/) From e87c21232261d68fd34c9e68555282206097995d Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Fri, 18 Mar 2016 14:23:55 -0400 Subject: [PATCH 051/207] dependency logstash-core > 2.0.0 < 6.0.0.alpha1 --- CHANGELOG.md | 2 ++ logstash-input-elasticsearch.gemspec | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb9cfb5c..f03ab6f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,5 @@ +# 2.0.4 + - New dependency requirements for logstash-core for the 5.0 release ## 2.0.3 - Refactored field references and cleanups diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 312ecd4a..4fcb31cf 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '2.0.3' + s.version = '2.0.4' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" @@ -20,7 +20,7 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } # Gem dependencies - s.add_runtime_dependency "logstash-core", ">= 2.0.0.beta2", "< 3.0.0" + s.add_runtime_dependency "logstash-core", ">= 2.0.0", "< 6.0.0.alpha1" s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] From d953db61039a63949425c75cdde8471973e45f2b Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Wed, 23 Mar 2016 21:36:43 -0400 Subject: [PATCH 052/207] dependency logstash-core-plugin-api ~> 1.0 --- CHANGELOG.md | 2 ++ logstash-input-elasticsearch.gemspec | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f03ab6f8..634897de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,5 @@ +# 2.0.5 + - Depend on logstash-core-plugin-api instead of logstash-core, removing the need to mass update plugins on major releases of logstash # 2.0.4 - New dependency requirements for logstash-core for the 5.0 release ## 2.0.3 diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 4fcb31cf..ea6c6a18 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '2.0.4' + s.version = '2.0.5' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" @@ -20,7 +20,7 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } # Gem dependencies - s.add_runtime_dependency "logstash-core", ">= 2.0.0", "< 6.0.0.alpha1" + s.add_runtime_dependency "logstash-core-plugin-api", "~> 1.0" s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] From b3fcb68770682353ec92fdc3d3be5ad2e9f5219c Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Fri, 8 Apr 2016 14:38:16 -0400 Subject: [PATCH 053/207] adding .travis.yml --- .travis.yml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..350c4ebc --- /dev/null +++ b/.travis.yml @@ -0,0 +1,7 @@ +sudo: false +language: ruby +cache: bundler +rvm: + - jruby-1.7.23 +script: + - bundle exec rspec spec From c36a0cf36b1e922ebe9ef9cacbfd16d3559fc12a Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Fri, 8 Apr 2016 15:20:39 -0400 Subject: [PATCH 054/207] adding travis badge --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index bb921caf..da791f24 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,7 @@ # Logstash Plugin [![Gem Version](https://badge.fury.io/rb/logstash-input-elasticsearch.svg)](https://badge.fury.io/rb/logstash-input-elasticsearch) -[![Build -Status](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-elasticsearch-unit/badge/icon)](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-elasticsearch-unit/) +[![Travis Build Status](https://travis-ci.org/logstash-plugins/logstash-input-elasticsearch.svg)](https://travis-ci.org/logstash-plugins/logstash-input-elasticsearch) This is a plugin for [Logstash](https://github.com/elastic/logstash). From 9aa37281b6da20a6ee16d6dd8976e1253e56413e Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Wed, 20 Apr 2016 09:42:08 -0400 Subject: [PATCH 055/207] License 2016 and new plugin installing command --- .github/CONTRIBUTING.md | 65 ++++++++++++++++++++++++++++ .github/ISSUE_TEMPLATE.md | 9 ++++ .github/PULL_REQUEST_TEMPLATE.md | 1 + LICENSE | 2 +- README.md | 12 ++++- logstash-input-elasticsearch.gemspec | 2 +- 6 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 .github/CONTRIBUTING.md create mode 100644 .github/ISSUE_TEMPLATE.md create mode 100644 .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 00000000..d3253288 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,65 @@ +# Contributing to Logstash + +All contributions are welcome: ideas, patches, documentation, bug reports, +complaints, etc! + +Programming is not a required skill, and there are many ways to help out! +It is more important to us that you are able to contribute. + +That said, some basic guidelines, which you are free to ignore :) + +## Want to learn? + +Want to lurk about and see what others are doing with Logstash? + +* The irc channel (#logstash on irc.freenode.org) is a good place for this +* The [forum](https://discuss.elastic.co/c/logstash) is also + great for learning from others. + +## Got Questions? + +Have a problem you want Logstash to solve for you? + +* You can ask a question in the [forum](https://discuss.elastic.co/c/logstash) +* Alternately, you are welcome to join the IRC channel #logstash on +irc.freenode.org and ask for help there! + +## Have an Idea or Feature Request? + +* File a ticket on [GitHub](https://github.com/elastic/logstash/issues). Please remember that GitHub is used only for issues and feature requests. If you have a general question, the [forum](https://discuss.elastic.co/c/logstash) or IRC would be the best place to ask. + +## Something Not Working? Found a Bug? + +If you think you found a bug, it probably is a bug. + +* If it is a general Logstash or a pipeline issue, file it in [Logstash GitHub](https://github.com/elasticsearch/logstash/issues) +* If it is specific to a plugin, please file it in the respective repository under [logstash-plugins](https://github.com/logstash-plugins) +* or ask the [forum](https://discuss.elastic.co/c/logstash). + +# Contributing Documentation and Code Changes + +If you have a bugfix or new feature that you would like to contribute to +logstash, and you think it will take more than a few minutes to produce the fix +(ie; write code), it is worth discussing the change with the Logstash users and developers first! You can reach us via [GitHub](https://github.com/elastic/logstash/issues), the [forum](https://discuss.elastic.co/c/logstash), or via IRC (#logstash on freenode irc) +Please note that Pull Requests without tests will not be merged. If you would like to contribute but do not have experience with writing tests, please ping us on IRC/forum or create a PR and ask our help. + +## Contributing to plugins + +Check our [documentation](https://www.elastic.co/guide/en/logstash/current/contributing-to-logstash.html) on how to contribute to plugins or write your own! It is super easy! + +## Contribution Steps + +1. Test your changes! [Run](https://github.com/elastic/logstash#testing) the test suite +2. Please make sure you have signed our [Contributor License + Agreement](https://www.elastic.co/contributor-agreement/). We are not + asking you to assign copyright to us, but to give us the right to distribute + your code without restriction. We ask this of all contributors in order to + assure our users of the origin and continuing existence of the code. You + only need to sign the CLA once. +3. Send a pull request! Push your changes to your fork of the repository and + [submit a pull + request](https://help.github.com/articles/using-pull-requests). In the pull + request, describe what your changes do and mention any bugs/issues related + to the pull request. + + diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 00000000..c3cc91df --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,9 @@ +Please post all product and debugging questions on our [forum](https://discuss.elastic.co/c/logstash). Your questions will reach our wider community members there, and if we confirm that there is a bug, then we can open a new issue here. + +For all general issues, please provide the following details for fast resolution: + +- Version: +- Operating System: +- Config File (if you have sensitive info, please remove it): +- Sample Data: +- Steps to Reproduce: diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..a1538275 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1 @@ +Thanks for contributing to Logstash! If you haven't already signed our CLA, here's a handy link: https://www.elastic.co/contributor-agreement/ diff --git a/LICENSE b/LICENSE index 8026afdc..43976b73 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2012–2015 Elasticsearch +Copyright (c) 2012–2016 Elasticsearch Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index da791f24..6eb7923c 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,12 @@ gem "logstash-filter-awesome", :path => "/your/local/logstash-filter-awesome" ``` - Install plugin ```sh +# Logstash 2.3 and higher +bin/logstah-plugin install --no-verify + +# Prior to Logstash 2.3 bin/plugin install --no-verify + ``` - Run Logstash with your plugin ```sh @@ -74,7 +79,12 @@ gem build logstash-filter-awesome.gemspec ``` - Install the plugin from the Logstash home ```sh -bin/plugin install /your/local/plugin/logstash-filter-awesome.gem +# Logstash 2.3 and higher +bin/logstah-plugin install --no-verify + +# Prior to Logstash 2.3 +bin/plugin install --no-verify + ``` - Start Logstash and proceed to test the plugin diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index ea6c6a18..d63b142c 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -4,7 +4,7 @@ Gem::Specification.new do |s| s.version = '2.0.5' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" - s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" + s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" s.authors = ["Elastic"] s.email = 'info@elastic.co' s.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html" From 07b3d04058df02862dacfc1ac550e39343e9680a Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Wed, 20 Apr 2016 13:59:13 -0400 Subject: [PATCH 056/207] Fix a typo in the command --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6eb7923c..4551c4ed 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ gem "logstash-filter-awesome", :path => "/your/local/logstash-filter-awesome" - Install plugin ```sh # Logstash 2.3 and higher -bin/logstah-plugin install --no-verify +bin/logstash-plugin install --no-verify # Prior to Logstash 2.3 bin/plugin install --no-verify @@ -80,7 +80,7 @@ gem build logstash-filter-awesome.gemspec - Install the plugin from the Logstash home ```sh # Logstash 2.3 and higher -bin/logstah-plugin install --no-verify +bin/logstash-plugin install --no-verify # Prior to Logstash 2.3 bin/plugin install --no-verify From 50b4d296112eab5601bfaf4ed0c8dc8cbeff63ea Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 3 May 2016 09:07:57 -0400 Subject: [PATCH 057/207] Update plugin to the v2.0 api --- .travis.yml | 10 +++++--- Gemfile | 7 ++++- lib/logstash/inputs/elasticsearch.rb | 4 +-- logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 38 ++++++++++++++-------------- 5 files changed, 35 insertions(+), 26 deletions(-) diff --git a/.travis.yml b/.travis.yml index 350c4ebc..6b858e57 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,11 @@ +--- sudo: false language: ruby cache: bundler rvm: - - jruby-1.7.23 -script: - - bundle exec rspec spec +- jruby-1.7.25 +script: +- bundle exec rspec spec +jdk: oraclejdk8 +before_install: +- git clone -b feature/event_interface https://github.com/elastic/logstash diff --git a/Gemfile b/Gemfile index d9266971..d97f41d1 100644 --- a/Gemfile +++ b/Gemfile @@ -1,2 +1,7 @@ source 'https://rubygems.org/' -gemspec \ No newline at end of file +gemspec +gem "logstash-core", :path => "./logstash/logstash-core" +gem "logstash-core-plugin-api", :path => "./logstash/logstash-core-plugin-api" +gem "logstash-core-event-java", :path => "./logstash/logstash-core-event-java" +gem "logstash-devutils", :github => "elastic/logstash-devutils", :branch => "feature/plugin-api-2_0" +gem "logstash-codec-json", :github => "logstash-plugins/logstash-codec-json", :branch => "feature/plugin-api-2_0" diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 5765e122..81c61e36 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -173,7 +173,7 @@ def push_hit(hit, output_queue) if @docinfo # do not assume event[@docinfo_target] to be in-place updatable. first get it, update it, then at the end set it in the event. - docinfo_target = event[@docinfo_target] || {} + docinfo_target = event.get(@docinfo_target) || {} unless docinfo_target.is_a?(Hash) @logger.error("Elasticsearch Input: Incompatible Event, incompatible type for the docinfo_target=#{@docinfo_target} field in the `_source` document, expected a hash got:", :docinfo_target_type => docinfo_target.class, :event => event) @@ -186,7 +186,7 @@ def push_hit(hit, output_queue) docinfo_target[field] = hit[field] end - event[@docinfo_target] = docinfo_target + event.set(@docinfo_target, docinfo_target) end output_queue << event diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index d63b142c..0ee2e2a9 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -20,7 +20,7 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } # Gem dependencies - s.add_runtime_dependency "logstash-core-plugin-api", "~> 1.0" + s.add_runtime_dependency "logstash-core-plugin-api", "~> 2.0" s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 92cffead..dac56c76 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -71,7 +71,7 @@ end insist { event }.is_a?(LogStash::Event) - insist { event["message"] } == [ "ohayo" ] + insist { event.get("message") } == [ "ohayo" ] end it "should retrieve json event from elasticseach with scan" do @@ -128,7 +128,7 @@ end insist { event }.is_a?(LogStash::Event) - insist { event["message"] } == [ "ohayo" ] + insist { event.get("message") } == [ "ohayo" ] end context "with Elasticsearch document information" do @@ -208,10 +208,10 @@ queue.pop end - expect(event[metadata_field]["_index"]).to eq('logstash-2014.10.12') - expect(event[metadata_field]["_type"]).to eq('logs') - expect(event[metadata_field]["_id"]).to eq('C5b2xLQwTZa76jBmHIbwHQ') - expect(event[metadata_field]["awesome"]).to eq("logstash") + expect(event.get("[#{metadata_field}][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[#{metadata_field}][_type]")).to eq('logs') + expect(event.get("[#{metadata_field}][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') + expect(event.get("[#{metadata_field}][awesome]")).to eq("logstash") end it 'thows an exception if the `docinfo_target` exist but is not of type hash' do @@ -243,9 +243,9 @@ queue.pop end - expect(event["[@metadata][_index]"]).to eq('logstash-2014.10.12') - expect(event["[@metadata][_type]"]).to eq('logs') - expect(event["[@metadata][_id]"]).to eq('C5b2xLQwTZa76jBmHIbwHQ') + expect(event.get("[@metadata][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[@metadata][_type]")).to eq('logs') + expect(event.get("[@metadata][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') end it 'should move the document information to the specified field' do @@ -264,9 +264,9 @@ queue.pop end - expect(event["[meta][_index]"]).to eq('logstash-2014.10.12') - expect(event["[meta][_type]"]).to eq('logs') - expect(event["[meta][_id]"]).to eq('C5b2xLQwTZa76jBmHIbwHQ') + expect(event.get("[meta][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[meta][_type]")).to eq('logs') + expect(event.get("[meta][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') end it "should allow to specify which fields from the document info to save to the @metadata field" do @@ -286,10 +286,10 @@ queue.pop end - expect(event["@metadata"].keys).to eq(fields) - expect(event["[@metadata][_type]"]).to eq(nil) - expect(event["[@metadata][_index]"]).to eq('logstash-2014.10.12') - expect(event["[@metadata][_id]"]).to eq(nil) + expect(event.get("@metadata").keys).to eq(fields) + expect(event.get("[@metadata][_type]")).to eq(nil) + expect(event.get("[@metadata][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[@metadata][_id]")).to eq(nil) end end @@ -308,9 +308,9 @@ queue.pop end - expect(event["[@metadata][_index]"]).to eq(nil) - expect(event["[@metadata][_type]"]).to eq(nil) - expect(event["[@metadata][_id]"]).to eq(nil) + expect(event.get("[@metadata][_index]")).to eq(nil) + expect(event.get("[@metadata][_type]")).to eq(nil) + expect(event.get("[@metadata][_id]")).to eq(nil) end end end From 1344e611ad005d0a71ea0be95ef1856a65f4ef72 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Fri, 6 May 2016 14:53:04 -0400 Subject: [PATCH 058/207] Mass update for 2.0 plugin api changes --- .travis.yml | 3 +-- CHANGELOG.md | 2 ++ Gemfile | 7 ++----- logstash-input-elasticsearch.gemspec | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6b858e57..5c63af6e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,5 +7,4 @@ rvm: script: - bundle exec rspec spec jdk: oraclejdk8 -before_install: -- git clone -b feature/event_interface https://github.com/elastic/logstash +before_install: [] diff --git a/CHANGELOG.md b/CHANGELOG.md index 634897de..603639fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,5 @@ +## 3.0.0 + - Update the plugin to the version 2.0 of the plugin api, this change is required for Logstash 5.0 compatibility. See https://github.com/elastic/logstash/issues/5141 # 2.0.5 - Depend on logstash-core-plugin-api instead of logstash-core, removing the need to mass update plugins on major releases of logstash # 2.0.4 diff --git a/Gemfile b/Gemfile index d97f41d1..2b03d18e 100644 --- a/Gemfile +++ b/Gemfile @@ -1,7 +1,4 @@ source 'https://rubygems.org/' + +# Specify your gem's dependencies in logstash-mass_effect.gemspec gemspec -gem "logstash-core", :path => "./logstash/logstash-core" -gem "logstash-core-plugin-api", :path => "./logstash/logstash-core-plugin-api" -gem "logstash-core-event-java", :path => "./logstash/logstash-core-event-java" -gem "logstash-devutils", :github => "elastic/logstash-devutils", :branch => "feature/plugin-api-2_0" -gem "logstash-codec-json", :github => "logstash-plugins/logstash-codec-json", :branch => "feature/plugin-api-2_0" diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 0ee2e2a9..9c01535f 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '2.0.5' + s.version = '3.0.0' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 1f1495987061bc1053f2c74b5d82585d1f0e0e6a Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Fri, 6 May 2016 16:39:14 -0400 Subject: [PATCH 059/207] Make sure all the gems were published under jruby --- CHANGELOG.md | 2 ++ logstash-input-elasticsearch.gemspec | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 603639fd..797ecbe2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,5 @@ +## 3.0.1 + - Republish all the gems under jruby. ## 3.0.0 - Update the plugin to the version 2.0 of the plugin api, this change is required for Logstash 5.0 compatibility. See https://github.com/elastic/logstash/issues/5141 # 2.0.5 diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 9c01535f..7c92688a 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '3.0.0' + s.version = '3.0.1' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 8d833bb779ac033e8c4195347a074699d9cd5f49 Mon Sep 17 00:00:00 2001 From: markwalkom Date: Thu, 26 May 2016 13:48:52 +1000 Subject: [PATCH 060/207] Update elasticsearch.rb Fixes #45 --- lib/logstash/inputs/elasticsearch.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 81c61e36..19d0d8b9 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -39,7 +39,9 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # The index or alias to search. config :index, :validate => :string, :default => "logstash-*" - # The query to be executed. + # The query to be executed. Read the Elasticsearch query DSL documentation + # for more info + # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html config :query, :validate => :string, :default => '{"query": { "match_all": {} } }' # Enable the Elasticsearch "scan" search type. This will disable From 648581fc46884a8f44f729300368a201525c089c Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Thu, 14 Jul 2016 12:25:20 +0100 Subject: [PATCH 061/207] dependency logstash-core-plugin-api >= 1.60 <= 2.99 --- CHANGELOG.md | 3 +++ logstash-input-elasticsearch.gemspec | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 797ecbe2..95c8a18c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 3.0.2 + - Relax constraint on logstash-core-plugin-api to >= 1.60 <= 2.99 + ## 3.0.1 - Republish all the gems under jruby. ## 3.0.0 diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 7c92688a..d98b648c 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '3.0.1' + s.version = '3.0.2' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -20,7 +20,7 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } # Gem dependencies - s.add_runtime_dependency "logstash-core-plugin-api", "~> 2.0" + s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] From 732cb2696890c4dd57f473f1acc648375e9daf77 Mon Sep 17 00:00:00 2001 From: ericamick Date: Tue, 19 Apr 2016 09:10:47 -0400 Subject: [PATCH 062/207] Update elasticsearch.rb Fixes #43 --- lib/logstash/inputs/elasticsearch.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 19d0d8b9..c758f999 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -61,7 +61,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # # It might be important to note, with regards to metadata, that if you're # ingesting documents with the intent to re-index them (or just update them) - # that the `action` option in the elasticsearch output want's to know how to + # that the `action` option in the elasticsearch output wants to know how to # handle those things. It can be dynamically assigned with a field # added to the metadata. # From f9e080b7ed8d0681ea4f588e32ab9ccea97247cb Mon Sep 17 00:00:00 2001 From: Aaron Mildenstein Date: Thu, 13 Oct 2016 12:33:20 -0600 Subject: [PATCH 063/207] Remove scan option Use `{ "sort": [ "_doc" ] }` instead. Clean up the previous stuff. fixes #50 Fixes #51 --- CHANGELOG.md | 8 ++++ lib/logstash/inputs/elasticsearch.rb | 27 ++++------- logstash-input-elasticsearch.gemspec | 3 +- spec/inputs/elasticsearch_spec.rb | 68 +--------------------------- 4 files changed, 19 insertions(+), 87 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95c8a18c..fafa5ca3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## 4.0.0 + - Remove `scan` from list of options as this is no longer allowed in + Elasticsearch 5.0. + - Change default query to sort by \_doc, as this replicates the `scan` + behavior + - Improve documentation to show sort by \_doc, and how to add it to custom + queries. + ## 3.0.2 - Relax constraint on logstash-core-plugin-api to >= 1.60 <= 2.99 diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index c758f999..aaeb7665 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -12,7 +12,7 @@ # # Read all documents from Elasticsearch matching the given query # elasticsearch { # hosts => "localhost" -# query => '{ "query": { "match": { "statuscode": 200 } } }' +# query => '{ "query": { "match": { "statuscode": 200 } }, "sort": [ "_doc" ] }' # } # } # @@ -23,7 +23,8 @@ # "match": { # "statuscode": 200 # } -# } +# }, +# "sort": [ "_doc" ] # }' # class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base @@ -40,20 +41,16 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base config :index, :validate => :string, :default => "logstash-*" # The query to be executed. Read the Elasticsearch query DSL documentation - # for more info + # for more info # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html - config :query, :validate => :string, :default => '{"query": { "match_all": {} } }' - - # Enable the Elasticsearch "scan" search type. This will disable - # sorting but increase speed and performance. - config :scan, :validate => :boolean, :default => true + config :query, :validate => :string, :default => '{ "sort": [ "_doc" ] }' # This allows you to set the maximum number of hits returned per scroll. config :size, :validate => :number, :default => 1000 # This parameter controls the keepalive time in seconds of the scrolling # request and initiates the scrolling process. The timeout applies per - # round trip (i.e. between the previous scan scroll request, to the next). + # round trip (i.e. between the previous scroll request, to the next). config :scroll, :validate => :string, :default => "1m" # If set, include Elasticsearch document information such as index, type, and @@ -117,8 +114,6 @@ def register :size => @size } - @options[:search_type] = 'scan' if @scan - transport_options = {} if @user && @password @@ -146,14 +141,8 @@ def run(output_queue) # get first wave of data r = @client.search(@options) - # since 'scan' doesn't return data on the search call, do an extra scroll - if @scan - r = process_next_scroll(output_queue, r['_scroll_id']) - has_hits = r['has_hits'] - else # not a scan, process the response - r['hits']['hits'].each { |hit| push_hit(hit, output_queue) } - has_hits = r['hits']['hits'].any? - end + r['hits']['hits'].each { |hit| push_hit(hit, output_queue) } + has_hits = r['hits']['hits'].any? while has_hits && !stop? r = process_next_scroll(output_queue, r['_scroll_id']) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index d98b648c..7640b3a5 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '3.0.2' + s.version = '4.0.0' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -28,4 +28,3 @@ Gem::Specification.new do |s| s.add_development_dependency 'logstash-devutils' end - diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index dac56c76..cf8a97f6 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -28,7 +28,6 @@ input { elasticsearch { hosts => ["localhost"] - scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' } } @@ -74,63 +73,6 @@ insist { event.get("message") } == [ "ohayo" ] end - it "should retrieve json event from elasticseach with scan" do - config = %q[ - input { - elasticsearch { - hosts => ["localhost"] - scan => true - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - } - } - ] - - scan_response = { - "_scroll_id" => "DcrY3G1xff6SB", - } - - scroll_responses = [ - { - "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", - "took" => 27, - "timed_out" => false, - "_shards" => { - "total" => 169, - "successful" => 169, - "failed" => 0 - }, - "hits" => { - "total" => 1, - "max_score" => 1.0, - "hits" => [ { - "_index" => "logstash-2014.10.12", - "_type" => "logs", - "_id" => "C5b2xLQwTZa76jBmHIbwHQ", - "_score" => 1.0, - "_source" => { "message" => ["ohayo"] } - } ] - } - }, - { - "_scroll_id" => "r453Wc1jh0caLJhSDg", - "hits" => { "hits" => [] } - } - ] - - client = Elasticsearch::Client.new - expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) - expect(client).to receive(:search).with(any_args).and_return(scan_response) - expect(client).to receive(:scroll).with({ :body => "DcrY3G1xff6SB", :scroll => "1m" }).and_return(scroll_responses.first) - expect(client).to receive(:scroll).with({ :body=> "cXVlcnlUaGVuRmV0Y2g", :scroll => "1m" }).and_return(scroll_responses.last) - - event = input(config) do |pipeline, queue| - queue.pop - end - - insist { event }.is_a?(LogStash::Event) - insist { event.get("message") } == [ "ohayo" ] - end - context "with Elasticsearch document information" do let!(:response) do { @@ -150,7 +92,7 @@ "_type" => "logs", "_id" => "C5b2xLQwTZa76jBmHIbwHQ", "_score" => 1.0, - "_source" => { + "_source" => { "message" => ["ohayo"], "metadata_with_hash" => { "awesome" => "logstash" }, "metadata_with_string" => "a string" @@ -181,7 +123,6 @@ input { elasticsearch { hosts => ["localhost"] - scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' docinfo => true } @@ -196,7 +137,6 @@ input { elasticsearch { hosts => ["localhost"] - scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' docinfo => true docinfo_target => '#{metadata_field}' @@ -213,7 +153,7 @@ expect(event.get("[#{metadata_field}][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') expect(event.get("[#{metadata_field}][awesome]")).to eq("logstash") end - + it 'thows an exception if the `docinfo_target` exist but is not of type hash' do metadata_field = 'metadata_with_string' @@ -221,7 +161,6 @@ input { elasticsearch { hosts => ["localhost"] - scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' docinfo => true docinfo_target => '#{metadata_field}' @@ -253,7 +192,6 @@ input { elasticsearch { hosts => ["localhost"] - scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' docinfo => true docinfo_target => 'meta' @@ -275,7 +213,6 @@ input { elasticsearch { hosts => ["localhost"] - scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' docinfo => true docinfo_fields => #{fields} @@ -299,7 +236,6 @@ input { elasticsearch { hosts => ["localhost"] - scan => false query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' } } From 3affba91634155ba74d7eeac559c7eddd9269b87 Mon Sep 17 00:00:00 2001 From: Peter Dyson Date: Thu, 24 Nov 2016 15:22:23 +1000 Subject: [PATCH 064/207] Update to docs for SSL ca_file option Make the expected file format and requirement for any chain certificates clearer. --- lib/logstash/inputs/elasticsearch.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index aaeb7665..b1676094 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -101,7 +101,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # SSL config :ssl, :validate => :boolean, :default => false - # SSL Certificate Authority file + # SSL Certificate Authority file in PEM encoded format, must also include any chain certificates as necessary config :ca_file, :validate => :path def register From 611e74cb18bc1a1b1d7b9ca51cf419ed1175b485 Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Wed, 1 Feb 2017 13:17:52 -0600 Subject: [PATCH 065/207] Use manticore client Fixes #54 --- CHANGELOG.md | 4 ++++ lib/logstash/inputs/elasticsearch.rb | 22 +++++++++++++++------- logstash-input-elasticsearch.gemspec | 6 ++++-- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fafa5ca3..a116b3e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 4.0.1 + - Switch internal HTTP client to support TLSv1.2 + - Upgrade ES client internally to better support ES 5.x + ## 4.0.0 - Remove `scan` from list of options as this is no longer allowed in Elasticsearch 5.0. diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index b1676094..a43ff48d 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -1,6 +1,8 @@ # encoding: utf-8 require "logstash/inputs/base" require "logstash/namespace" +require 'elasticsearch' +require 'elasticsearch/transport/transport/http/manticore' require "base64" # Read from an Elasticsearch cluster, based on search query results. @@ -105,8 +107,6 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base config :ca_file, :validate => :path def register - require "elasticsearch" - @options = { :index => @index, :body => @query, @@ -117,8 +117,10 @@ def register transport_options = {} if @user && @password - token = Base64.strict_encode64("#{@user}:#{@password.value}") - transport_options[:headers] = { :Authorization => "Basic #{token}" } + transport_options[:auth] = { + :user => @user, + :password => @password.value + } end hosts = if @ssl then @@ -130,11 +132,17 @@ def register @hosts end + client_options = { + :hosts => hosts, + :transport_options => transport_options, + :transport_class => Elasticsearch::Transport::Transport::HTTP::Manticore + } + if @ssl && @ca_file - transport_options[:ssl] = { :ca_file => @ca_file } + client_options[:ssl] = { :enabled => true, :ca_file => @ca_file } end - - @client = Elasticsearch::Client.new(:hosts => hosts, :transport_options => transport_options) + + @client = Elasticsearch::Client.new(client_options) end def run(output_queue) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 7640b3a5..de82754d 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.0.0' + s.version = '4.0.1' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -22,7 +22,9 @@ Gem::Specification.new do |s| # Gem dependencies s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" - s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] + s.add_runtime_dependency 'elasticsearch', ['>= 5.0.0', '< 6.0.0'] + s.add_runtime_dependency "manticore", '>= 0.5.4', '< 1.0.0' + s.add_runtime_dependency 'logstash-codec-json' From d5271f999ac9301fa319c89c7823a926b7b3da2d Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Tue, 7 Feb 2017 09:53:06 -0600 Subject: [PATCH 066/207] Revert "Use manticore client" we didn't need to make that change after all This reverts commit 611e74cb18bc1a1b1d7b9ca51cf419ed1175b485. Fixes #57 --- CHANGELOG.md | 4 ---- lib/logstash/inputs/elasticsearch.rb | 22 +++++++--------------- logstash-input-elasticsearch.gemspec | 6 ++---- 3 files changed, 9 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a116b3e5..fafa5ca3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,3 @@ -## 4.0.1 - - Switch internal HTTP client to support TLSv1.2 - - Upgrade ES client internally to better support ES 5.x - ## 4.0.0 - Remove `scan` from list of options as this is no longer allowed in Elasticsearch 5.0. diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index a43ff48d..b1676094 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -1,8 +1,6 @@ # encoding: utf-8 require "logstash/inputs/base" require "logstash/namespace" -require 'elasticsearch' -require 'elasticsearch/transport/transport/http/manticore' require "base64" # Read from an Elasticsearch cluster, based on search query results. @@ -107,6 +105,8 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base config :ca_file, :validate => :path def register + require "elasticsearch" + @options = { :index => @index, :body => @query, @@ -117,10 +117,8 @@ def register transport_options = {} if @user && @password - transport_options[:auth] = { - :user => @user, - :password => @password.value - } + token = Base64.strict_encode64("#{@user}:#{@password.value}") + transport_options[:headers] = { :Authorization => "Basic #{token}" } end hosts = if @ssl then @@ -132,17 +130,11 @@ def register @hosts end - client_options = { - :hosts => hosts, - :transport_options => transport_options, - :transport_class => Elasticsearch::Transport::Transport::HTTP::Manticore - } - if @ssl && @ca_file - client_options[:ssl] = { :enabled => true, :ca_file => @ca_file } + transport_options[:ssl] = { :ca_file => @ca_file } end - - @client = Elasticsearch::Client.new(client_options) + + @client = Elasticsearch::Client.new(:hosts => hosts, :transport_options => transport_options) end def run(output_queue) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index de82754d..7640b3a5 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.0.1' + s.version = '4.0.0' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -22,9 +22,7 @@ Gem::Specification.new do |s| # Gem dependencies s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" - s.add_runtime_dependency 'elasticsearch', ['>= 5.0.0', '< 6.0.0'] - s.add_runtime_dependency "manticore", '>= 0.5.4', '< 1.0.0' - + s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] s.add_runtime_dependency 'logstash-codec-json' From 7028b9733520cca04fac58d12fd43c7be7262582 Mon Sep 17 00:00:00 2001 From: Andrew Cholakian Date: Tue, 7 Feb 2017 09:58:42 -0600 Subject: [PATCH 067/207] Upgrade es-ruby client to use 5.0.2 which supports correct content-type Fixes #55 Fixes #57 --- CHANGELOG.md | 8 ++++++++ logstash-input-elasticsearch.gemspec | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fafa5ca3..86551e0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## 4.0.2 + - Bump ES client to 5.0.2 to get content-type: json behavior + - Revert unneeded manticore change + +## 4.0.1 + - Switch internal HTTP client to support TLSv1.2 + - Upgrade ES client internally to better support ES 5.x + ## 4.0.0 - Remove `scan` from list of options as this is no longer allowed in Elasticsearch 5.0. diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 7640b3a5..6340ffaa 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.0.0' + s.version = '4.0.2' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -22,7 +22,7 @@ Gem::Specification.new do |s| # Gem dependencies s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" - s.add_runtime_dependency 'elasticsearch', ['>= 1.0.6', '~> 1.0'] + s.add_runtime_dependency 'elasticsearch', ['>= 5.0.3', '< 6.0.0'] s.add_runtime_dependency 'logstash-codec-json' From e159cf8d23ed8630de6e24afcc38f1125058e4de Mon Sep 17 00:00:00 2001 From: DeDe Morton Date: Fri, 24 Mar 2017 19:04:26 -0700 Subject: [PATCH 068/207] Add plugin level required to support sending content-type headers --- CHANGELOG.md | 3 +++ lib/logstash/inputs/elasticsearch.rb | 10 ++++++++++ logstash-input-elasticsearch.gemspec | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 86551e0e..39d3feaf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.0.3 + - Docs: Add requirement to use version 4.0.2 or higher to support sending Content-Type headers + ## 4.0.2 - Bump ES client to 5.0.2 to get content-type: json behavior - Revert unneeded manticore change diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index b1676094..df86b554 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -3,6 +3,16 @@ require "logstash/namespace" require "base64" +# .Compatibility Note +# [NOTE] +# ================================================================================ +# Starting with Elasticsearch 5.3, there's an {ref}modules-http.html[HTTP setting] +# called `http.content_type.required`. If this option is set to `true`, and you +# are using Logstash 2.4 through 5.2, you need to update the Elasticsearch input +# plugin to version 4.0.2 or higher. +# +# ================================================================================ +# # Read from an Elasticsearch cluster, based on search query results. # This is useful for replaying test logs, reindexing, etc. # diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 6340ffaa..ecdcb0ce 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.0.2' + s.version = '4.0.3' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 5ba7dba43fd1a1c7e2d93dfc4de9a82f26bef21e Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Thu, 6 Apr 2017 13:30:36 +0100 Subject: [PATCH 069/207] Fix scrolling requests by using json formatted bodies This makes scrolling incompatible with ES 1.x NOTE: It's already documented that version 5.x of this gem isn't compatible with ES 1.x --- CHANGELOG.md | 1 + lib/logstash/inputs/elasticsearch.rb | 2 +- spec/inputs/elasticsearch_spec.rb | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39d3feaf..56f448c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## 4.0.3 - Docs: Add requirement to use version 4.0.2 or higher to support sending Content-Type headers + - Fix scrolling to use json bodies in the requests (this makes scrolling not work in ES 1.x) ## 4.0.2 - Bump ES client to 5.0.2 to get content-type: json behavior diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index df86b554..5be3f28f 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -194,6 +194,6 @@ def push_hit(hit, output_queue) end def scroll_request scroll_id - @client.scroll(:body => scroll_id, :scroll => @scroll) + @client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll) end end diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index cf8a97f6..b67a8dd7 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -63,7 +63,7 @@ client = Elasticsearch::Client.new expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) expect(client).to receive(:search).with(any_args).and_return(response) - expect(client).to receive(:scroll).with({ :body => "cXVlcnlUaGVuRmV0Y2g", :scroll=> "1m" }).and_return(scroll_reponse) + expect(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(scroll_reponse) event = input(config) do |pipeline, queue| queue.pop @@ -114,7 +114,7 @@ before do expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) expect(client).to receive(:search).with(any_args).and_return(response) - allow(client).to receive(:scroll).with({ :body => "cXVlcnlUaGVuRmV0Y2g", :scroll => "1m" }).and_return(scroll_reponse) + allow(client).to receive(:scroll).with({ :body => {:scroll_id => "cXVlcnlUaGVuRmV0Y2g"}, :scroll => "1m" }).and_return(scroll_reponse) end context 'when defining docinfo' do From 8bc521cca9c5c1de20212ad0252eda012477ed39 Mon Sep 17 00:00:00 2001 From: Jon Langdon Date: Sat, 15 Apr 2017 12:36:55 -0700 Subject: [PATCH 070/207] Fix incorrect ES output config option Fixes #63 --- lib/logstash/inputs/elasticsearch.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 5be3f28f..d3f5b950 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -87,7 +87,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # output { # elasticsearch { # index => "copy-of-production.%{[@metadata][_index]}" - # index_type => "%{[@metadata][_type]}" + # document_type => "%{[@metadata][_type]}" # document_id => "%{[@metadata][_id]}" # } # } From 769f563f3fb190b2d0698df9b50bcdab0d75b8bc Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Fri, 28 Apr 2017 21:08:26 +0000 Subject: [PATCH 071/207] Initial doc move --- docs/index.asciidoc | 219 +++++++++++++++++++++++++++ logstash-input-elasticsearch.gemspec | 2 +- 2 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 docs/index.asciidoc diff --git a/docs/index.asciidoc b/docs/index.asciidoc new file mode 100644 index 00000000..d84ed735 --- /dev/null +++ b/docs/index.asciidoc @@ -0,0 +1,219 @@ +:plugin: elasticsearch +:type: input + +/////////////////////////////////////////// +START - GENERATED VARIABLES, DO NOT EDIT! +/////////////////////////////////////////// +:version: %VERSION% +:release_date: %RELEASE_DATE% +:changelog_url: %CHANGELOG_URL% +:include_path: ../../../logstash/docs/include +/////////////////////////////////////////// +END - GENERATED VARIABLES, DO NOT EDIT! +/////////////////////////////////////////// + +[id="plugins-{type}-{plugin}"] + +=== Elasticsearch + +include::{include_path}/plugin_header.asciidoc[] + +==== Description + +.Compatibility Note +[NOTE] +================================================================================ +Starting with Elasticsearch 5.3, there's an {ref}modules-http.html[HTTP setting] +called `http.content_type.required`. If this option is set to `true`, and you +are using Logstash 2.4 through 5.2, you need to update the Elasticsearch input +plugin to version 4.0.2 or higher. + +================================================================================ + +Read from an Elasticsearch cluster, based on search query results. +This is useful for replaying test logs, reindexing, etc. + +Example: +[source,ruby] + input { + # Read all documents from Elasticsearch matching the given query + elasticsearch { + hosts => "localhost" + query => '{ "query": { "match": { "statuscode": 200 } }, "sort": [ "_doc" ] }' + } + } + +This would create an Elasticsearch query with the following format: +[source,json] + curl 'http://localhost:9200/logstash-*/_search?&scroll=1m&size=1000' -d '{ + "query": { + "match": { + "statuscode": 200 + } + }, + "sort": [ "_doc" ] + }' + + +[id="plugins-{type}s-{plugin}-options"] +==== Elasticsearch Input Configuration Options + +This plugin supports the following configuration options plus the <> described later. + +[cols="<,<,<",options="header",] +|======================================================================= +|Setting |Input type|Required +| <> |a valid filesystem path|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +|======================================================================= + +Also see <> for a list of options supported by all +input plugins. + +  + +[id="plugins-{type}s-{plugin}-ca_file"] +===== `ca_file` + + * Value type is <> + * There is no default value for this setting. + +SSL Certificate Authority file in PEM encoded format, must also include any chain certificates as necessary + +[id="plugins-{type}s-{plugin}-docinfo"] +===== `docinfo` + + * Value type is <> + * Default value is `false` + +If set, include Elasticsearch document information such as index, type, and +the id in the event. + +It might be important to note, with regards to metadata, that if you're +ingesting documents with the intent to re-index them (or just update them) +that the `action` option in the elasticsearch output wants to know how to +handle those things. It can be dynamically assigned with a field +added to the metadata. + +Example +[source, ruby] + input { + elasticsearch { + hosts => "es.production.mysite.org" + index => "mydata-2018.09.*" + query => "*" + size => 500 + scroll => "5m" + docinfo => true + } + } + output { + elasticsearch { + index => "copy-of-production.%{[@metadata][_index]}" + document_type => "%{[@metadata][_type]}" + document_id => "%{[@metadata][_id]}" + } + } + + +[id="plugins-{type}s-{plugin}-docinfo_fields"] +===== `docinfo_fields` + + * Value type is <> + * Default value is `["_index", "_type", "_id"]` + +List of document metadata to move to the `docinfo_target` field +To learn more about Elasticsearch metadata fields read +http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_document_metadata.html + +[id="plugins-{type}s-{plugin}-docinfo_target"] +===== `docinfo_target` + + * Value type is <> + * Default value is `"@metadata"` + +Where to move the Elasticsearch document information by default we use the @metadata field. + +[id="plugins-{type}s-{plugin}-hosts"] +===== `hosts` + + * Value type is <> + * There is no default value for this setting. + +List of elasticsearch hosts to use for querying. +each host can be either IP, HOST, IP:port or HOST:port +port defaults to 9200 + +[id="plugins-{type}s-{plugin}-index"] +===== `index` + + * Value type is <> + * Default value is `"logstash-*"` + +The index or alias to search. + +[id="plugins-{type}s-{plugin}-password"] +===== `password` + + * Value type is <> + * There is no default value for this setting. + +Basic Auth - password + +[id="plugins-{type}s-{plugin}-query"] +===== `query` + + * Value type is <> + * Default value is `"{ \"sort\": [ \"_doc\" ] }"` + +The query to be executed. Read the Elasticsearch query DSL documentation +for more info +https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html + +[id="plugins-{type}s-{plugin}-scroll"] +===== `scroll` + + * Value type is <> + * Default value is `"1m"` + +This parameter controls the keepalive time in seconds of the scrolling +request and initiates the scrolling process. The timeout applies per +round trip (i.e. between the previous scroll request, to the next). + +[id="plugins-{type}s-{plugin}-size"] +===== `size` + + * Value type is <> + * Default value is `1000` + +This allows you to set the maximum number of hits returned per scroll. + +[id="plugins-{type}s-{plugin}-ssl"] +===== `ssl` + + * Value type is <> + * Default value is `false` + +SSL + +[id="plugins-{type}s-{plugin}-user"] +===== `user` + + * Value type is <> + * There is no default value for this setting. + +Basic Auth - username + + + +include::{include_path}/{type}.asciidoc[] diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index ecdcb0ce..d3663621 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -11,7 +11,7 @@ Gem::Specification.new do |s| s.require_paths = ["lib"] # Files - s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT'] + s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"] # Tests s.test_files = s.files.grep(%r{^(test|spec|features)/}) From 96f57a602dccda974a0cb124e898f663fb75112a Mon Sep 17 00:00:00 2001 From: 7lima Date: Tue, 26 Jul 2016 14:43:22 +0200 Subject: [PATCH 072/207] Doc: some minor corrections. --- lib/logstash/inputs/elasticsearch.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index d3f5b950..65227e19 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -43,8 +43,8 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base default :codec, "json" # List of elasticsearch hosts to use for querying. - # each host can be either IP, HOST, IP:port or HOST:port - # port defaults to 9200 + # Each host can be either IP, HOST, IP:port or HOST:port. + # Port defaults to 9200 config :hosts, :validate => :array # The index or alias to search. @@ -94,10 +94,10 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # config :docinfo, :validate => :boolean, :default => false - # Where to move the Elasticsearch document information by default we use the @metadata field. + # Where to move the Elasticsearch document information. By default we use the @metadata field. config :docinfo_target, :validate=> :string, :default => LogStash::Event::METADATA - # List of document metadata to move to the `docinfo_target` field + # List of document metadata to move to the `docinfo_target` field. # To learn more about Elasticsearch metadata fields read # http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_document_metadata.html config :docinfo_fields, :validate => :array, :default => ['_index', '_type', '_id'] From 683738925764facc799d1587246b5622005b89a8 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Fri, 26 May 2017 13:01:03 -0400 Subject: [PATCH 073/207] new build system for jruby9k --- .travis.yml | 19 +++++++++++++++---- Gemfile | 8 +++++++- ci/build.sh | 21 +++++++++++++++++++++ ci/setup.sh | 26 ++++++++++++++++++++++++++ 4 files changed, 69 insertions(+), 5 deletions(-) create mode 100755 ci/build.sh create mode 100755 ci/setup.sh diff --git a/.travis.yml b/.travis.yml index 5c63af6e..f274087f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,9 +2,20 @@ sudo: false language: ruby cache: bundler +env: rvm: -- jruby-1.7.25 -script: -- bundle exec rspec spec + - jruby-1.7.25 +matrix: + include: + - rvm: jruby-1.7.25 + env: LOGSTASH_BRANCH=master + - rvm: jruby-1.7.25 + env: LOGSTASH_BRANCH=5.x + - rvm: jruby-9.1.9.0 + env: LOGSTASH_BRANCH=feature/9000 + allow_failures: + - rvm: jruby-9.1.9.0 + fast_finish: true +install: true +script: ci/build.sh jdk: oraclejdk8 -before_install: [] diff --git a/Gemfile b/Gemfile index 2b03d18e..93e5e5de 100644 --- a/Gemfile +++ b/Gemfile @@ -1,4 +1,10 @@ source 'https://rubygems.org/' -# Specify your gem's dependencies in logstash-mass_effect.gemspec gemspec + +logstash_path = "../../logstash" + +if Dir.exist?(logstash_path) && ENV["LOGSTASH_SOURCE"] == 1 + gem 'logstash-core', :path => "#{logstash_path}/logstash-core" + gem 'logstash-core-plugin-api', :path => "#{logstash_path}/logstash-core-plugin-api" +end diff --git a/ci/build.sh b/ci/build.sh new file mode 100755 index 00000000..076e9088 --- /dev/null +++ b/ci/build.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# version: 1 +######################################################## +# +# AUTOMATICALLY GENERATED! DO NOT EDIT +# +######################################################## +set -e + +echo "Starting build process in: `pwd`" +./ci/setup.sh + +if [[ -f "ci/run.sh" ]]; then + echo "Running custom build script in: `pwd`/ci/run.sh" + ./ci/run.sh +else + echo "Running default build scripts in: `pwd`/ci/build.sh" + bundle install + bundle exec rake vendor + bundle exec rspec spec +fi diff --git a/ci/setup.sh b/ci/setup.sh new file mode 100755 index 00000000..835fa437 --- /dev/null +++ b/ci/setup.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# version: 1 +######################################################## +# +# AUTOMATICALLY GENERATED! DO NOT EDIT +# +######################################################## +set -e +if [ "$LOGSTASH_BRANCH" ]; then + echo "Building plugin using Logstash source" + BASE_DIR=`pwd` + echo "Checking out branch: $LOGSTASH_BRANCH" + git clone -b $LOGSTASH_BRANCH https://github.com/elastic/logstash.git ../../logstash --depth 1 + printf "Checked out Logstash revision: %s\n" "$(git -C ../../logstash rev-parse HEAD)" + cd ../../logstash + echo "Building plugins with Logstash version:" + cat versions.yml + echo "---" + # We need to build the jars for that specific version + echo "Running gradle assemble in: `pwd`" + ./gradlew assemble + cd $BASE_DIR + export LOGSTASH_SOURCE=1 +else + echo "Building plugin using released gems on rubygems" +fi From 1771df36905d233cccb446dffcf5763e64a2d6d4 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Wed, 31 May 2017 16:44:56 -0400 Subject: [PATCH 074/207] Adjusting the build scripts to correctly load the logstash source and allow people to override it --- Gemfile | 5 +++-- ci/build.sh | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Gemfile b/Gemfile index 93e5e5de..32cc6fbb 100644 --- a/Gemfile +++ b/Gemfile @@ -2,9 +2,10 @@ source 'https://rubygems.org/' gemspec -logstash_path = "../../logstash" +logstash_path = ENV["LOGSTASH_PATH"] || "../../logstash" +use_logstash_source = ENV["LOGSTASH_SOURCE"] && ENV["LOGSTASH_SOURCE"].to_s == "1" -if Dir.exist?(logstash_path) && ENV["LOGSTASH_SOURCE"] == 1 +if Dir.exist?(logstash_path) && use_logstash_source gem 'logstash-core', :path => "#{logstash_path}/logstash-core" gem 'logstash-core-plugin-api', :path => "#{logstash_path}/logstash-core-plugin-api" end diff --git a/ci/build.sh b/ci/build.sh index 076e9088..06caffdc 100755 --- a/ci/build.sh +++ b/ci/build.sh @@ -8,11 +8,11 @@ set -e echo "Starting build process in: `pwd`" -./ci/setup.sh +source ./ci/setup.sh if [[ -f "ci/run.sh" ]]; then echo "Running custom build script in: `pwd`/ci/run.sh" - ./ci/run.sh + source ./ci/run.sh else echo "Running default build scripts in: `pwd`/ci/build.sh" bundle install From 49d3e1c7f3de67b78713cfdb4a01ac3923efb27a Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Fri, 2 Jun 2017 16:09:45 -0400 Subject: [PATCH 075/207] Use the new_pipeline_from_string in devutils We have changed the method signature between 5.x and master, I have now refactored the test to use the new new_pipeline_from_string helper defined in devutils. Fixes #72 --- spec/inputs/elasticsearch_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index b67a8dd7..174a98bf 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -168,7 +168,7 @@ } ] - pipeline = LogStash::Pipeline.new(config_metadata_with_string) + pipeline = new_pipeline_from_string(config_metadata_with_string) queue = Queue.new pipeline.instance_eval do @output_func = lambda { |event| queue << event } From 37f295236f72536d4c083ef91c9bebfb735accb6 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 13 Jun 2017 09:10:36 -0400 Subject: [PATCH 076/207] update .travis.yml for jruby9k jobs --- .travis.yml | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/.travis.yml b/.travis.yml index f274087f..59c937eb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,19 +2,15 @@ sudo: false language: ruby cache: bundler -env: +env: rvm: - - jruby-1.7.25 +- jruby-1.7.25 matrix: include: - - rvm: jruby-1.7.25 - env: LOGSTASH_BRANCH=master - - rvm: jruby-1.7.25 - env: LOGSTASH_BRANCH=5.x - - rvm: jruby-9.1.9.0 - env: LOGSTASH_BRANCH=feature/9000 - allow_failures: - - rvm: jruby-9.1.9.0 + - rvm: jruby-9.1.10.0 + env: LOGSTASH_BRANCH=master + - rvm: jruby-1.7.25 + env: LOGSTASH_BRANCH=5.x fast_finish: true install: true script: ci/build.sh From 735f9edb87b629dbeecf72187ca83bd1f77de89e Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Thu, 15 Jun 2017 20:00:55 -0400 Subject: [PATCH 077/207] update plugin header for better search results --- docs/index.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index d84ed735..6152e0f1 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -14,7 +14,7 @@ END - GENERATED VARIABLES, DO NOT EDIT! [id="plugins-{type}-{plugin}"] -=== Elasticsearch +=== Elasticsearch input plugin include::{include_path}/plugin_header.asciidoc[] From 7da51cc7366663825facf98a2b033dd55be2ab42 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Thu, 22 Jun 2017 22:30:32 -0400 Subject: [PATCH 078/207] [skip ci] Updating the plugin doc --- docs/index.asciidoc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 6152e0f1..01f4a94d 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -7,7 +7,7 @@ START - GENERATED VARIABLES, DO NOT EDIT! :version: %VERSION% :release_date: %RELEASE_DATE% :changelog_url: %CHANGELOG_URL% -:include_path: ../../../logstash/docs/include +:include_path: ../../../../logstash/docs/include /////////////////////////////////////////// END - GENERATED VARIABLES, DO NOT EDIT! /////////////////////////////////////////// @@ -58,7 +58,7 @@ This would create an Elasticsearch query with the following format: [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Input Configuration Options -This plugin supports the following configuration options plus the <> described later. +This plugin supports the following configuration options plus the <> described later. [cols="<,<,<",options="header",] |======================================================================= @@ -77,7 +77,7 @@ This plugin supports the following configuration options plus the <> |<>|No |======================================================================= -Also see <> for a list of options supported by all +Also see <> for a list of options supported by all input plugins.   @@ -216,4 +216,5 @@ Basic Auth - username -include::{include_path}/{type}.asciidoc[] +[id="plugins-{type}s-{plugin}-common-options"] +include::{include_path}/{type}.asciidoc[] \ No newline at end of file From 0c219df191ce038d1f42a5fcd65a48a5d2861b6b Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Thu, 22 Jun 2017 22:54:08 -0400 Subject: [PATCH 079/207] bump patch level for doc generation --- logstash-input-elasticsearch.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index d3663621..5a814f12 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.0.3' + s.version = '4.0.4' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From efe56f819649941cd812c8fc54c855aeb7320682 Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Mon, 26 Jun 2017 21:24:11 -0400 Subject: [PATCH 080/207] [skip ci] Updating the plugin id in the doc to match the index in the docbook --- docs/index.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 01f4a94d..abe4a358 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -12,7 +12,7 @@ START - GENERATED VARIABLES, DO NOT EDIT! END - GENERATED VARIABLES, DO NOT EDIT! /////////////////////////////////////////// -[id="plugins-{type}-{plugin}"] +[id="plugins-{type}s-{plugin}"] === Elasticsearch input plugin From a8db8e231232b4ad368c29d407bdb149f5972e05 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Wed, 26 Jul 2017 12:38:07 +0100 Subject: [PATCH 081/207] on travis test against 5.6 and 6.0 logstash-core --- .travis.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 59c937eb..7af01f7d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,9 +9,11 @@ matrix: include: - rvm: jruby-9.1.10.0 env: LOGSTASH_BRANCH=master + - rvm: jruby-9.1.10.0 + env: LOGSTASH_BRANCH=6.x - rvm: jruby-1.7.25 - env: LOGSTASH_BRANCH=5.x + env: LOGSTASH_BRANCH=5.6 fast_finish: true install: true script: ci/build.sh -jdk: oraclejdk8 +jdk: oraclejdk8 \ No newline at end of file From 5a2c8544d92bff176e6e5cdc9257a5cc5f1cde47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20B=C3=A4ck?= Date: Wed, 2 Aug 2017 22:26:37 +0200 Subject: [PATCH 082/207] docs: Various language improvements and elaborations. (#75) --- docs/index.asciidoc | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index abe4a358..28bcec39 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -88,7 +88,8 @@ input plugins. * Value type is <> * There is no default value for this setting. -SSL Certificate Authority file in PEM encoded format, must also include any chain certificates as necessary +SSL Certificate Authority file in PEM encoded format, must also +include any chain certificates as necessary. [id="plugins-{type}s-{plugin}-docinfo"] ===== `docinfo` @@ -111,7 +112,7 @@ Example elasticsearch { hosts => "es.production.mysite.org" index => "mydata-2018.09.*" - query => "*" + query => '{ "query": { "query_string": { "query": "*" } } }' size => 500 scroll => "5m" docinfo => true @@ -132,9 +133,11 @@ Example * Value type is <> * Default value is `["_index", "_type", "_id"]` -List of document metadata to move to the `docinfo_target` field -To learn more about Elasticsearch metadata fields read -http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_document_metadata.html +If document metadata storage is requested by enabling the `docinfo` +option, this option lists the metadata fields to save in the current +event. See +[Document Metadata](http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_document_metadata.html) +in the Elasticsearch documentation for more information. [id="plugins-{type}s-{plugin}-docinfo_target"] ===== `docinfo_target` @@ -142,7 +145,9 @@ http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_document_meta * Value type is <> * Default value is `"@metadata"` -Where to move the Elasticsearch document information by default we use the @metadata field. +If document metadata storage is requested by enabling the `docinfo` +option, this option names the field under which to store the metadata +fields as subfields. [id="plugins-{type}s-{plugin}-hosts"] ===== `hosts` @@ -150,9 +155,9 @@ Where to move the Elasticsearch document information by default we use the @meta * Value type is <> * There is no default value for this setting. -List of elasticsearch hosts to use for querying. -each host can be either IP, HOST, IP:port or HOST:port -port defaults to 9200 +List of one or more Elasticsearch hosts to use for querying. Each host +can be either IP, HOST, IP:port, or HOST:port. The port defaults to +9200. [id="plugins-{type}s-{plugin}-index"] ===== `index` @@ -168,17 +173,19 @@ The index or alias to search. * Value type is <> * There is no default value for this setting. -Basic Auth - password +The password to use together with the username in the `user` option +when authenticating to the Elasticsearch server. If set to an empty +string authentication will be disabled. [id="plugins-{type}s-{plugin}-query"] ===== `query` * Value type is <> - * Default value is `"{ \"sort\": [ \"_doc\" ] }"` + * Default value is `'{ "sort": [ "_doc" ] }'` -The query to be executed. Read the Elasticsearch query DSL documentation -for more info -https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html +The query to be executed. Read the +[Elasticsearch query DSL documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html) +for more information. [id="plugins-{type}s-{plugin}-scroll"] ===== `scroll` @@ -204,7 +211,8 @@ This allows you to set the maximum number of hits returned per scroll. * Value type is <> * Default value is `false` -SSL +If enabled, SSL will be used when communicating with the Elasticsearch +server (i.e. HTTPS will be used instead of plain HTTP). [id="plugins-{type}s-{plugin}-user"] ===== `user` @@ -212,9 +220,11 @@ SSL * Value type is <> * There is no default value for this setting. -Basic Auth - username +The username to use together with the password in the `password` +option when authenticating to the Elasticsearch server. If set to an +empty string authentication will be disabled. [id="plugins-{type}s-{plugin}-common-options"] -include::{include_path}/{type}.asciidoc[] \ No newline at end of file +include::{include_path}/{type}.asciidoc[] From 0fd0f0a2342949993026592599fc24e7fbde462d Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Tue, 15 Aug 2017 10:01:03 -0700 Subject: [PATCH 083/207] Version bump For https://github.com/elastic/logstash/issues/7993 [ci skip] --- CHANGELOG.md | 3 +++ logstash-input-elasticsearch.gemspec | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 56f448c1..a664d1b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.0.5 + - Fix some documentation issues + ## 4.0.3 - Docs: Add requirement to use version 4.0.2 or higher to support sending Content-Type headers - Fix scrolling to use json bodies in the requests (this makes scrolling not work in ES 1.x) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 5a814f12..aadae3d9 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.0.4' + s.version = '4.0.5' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 936e925f0cdf0f09e3b19620c86e87232bd4f847 Mon Sep 17 00:00:00 2001 From: DeDe Morton Date: Mon, 11 Sep 2017 16:34:54 -0700 Subject: [PATCH 084/207] Fix broken link Fixes #77 --- CHANGELOG.md | 3 +++ docs/index.asciidoc | 2 +- logstash-input-elasticsearch.gemspec | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a664d1b3..d2547199 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.0.6 + - Docs: Fix link syntax + ## 4.0.5 - Fix some documentation issues diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 28bcec39..8a2d486c 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -23,7 +23,7 @@ include::{include_path}/plugin_header.asciidoc[] .Compatibility Note [NOTE] ================================================================================ -Starting with Elasticsearch 5.3, there's an {ref}modules-http.html[HTTP setting] +Starting with Elasticsearch 5.3, there's an {ref}/modules-http.html[HTTP setting] called `http.content_type.required`. If this option is set to `true`, and you are using Logstash 2.4 through 5.2, you need to update the Elasticsearch input plugin to version 4.0.2 or higher. diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index aadae3d9..5d463b27 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.0.5' + s.version = '4.0.6' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 97b50de1f38e984b1dc65eecc98f24420b40b8ab Mon Sep 17 00:00:00 2001 From: Ry Biesemeyer Date: Thu, 26 Oct 2017 02:31:20 +0000 Subject: [PATCH 085/207] enable use of metadata fields in `add_field` decorations By performing event decorations after the metadata field has been populated we enable the supported `add_field` decoration to reference metadata fields. Resolves logstash-plugins/logstash-input-elasticsearch#76 Fixes #78 --- CHANGELOG.md | 3 +++ lib/logstash/inputs/elasticsearch.rb | 3 ++- logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 21 +++++++++++++++++++++ 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2547199..bdcfed29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.1.0 + - Enable use of docinfo (@metadata) fields in `add_field` decorations + ## 4.0.6 - Docs: Fix link syntax diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 65227e19..fbcad1c5 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -170,7 +170,6 @@ def process_next_scroll(output_queue, scroll_id) def push_hit(hit, output_queue) event = LogStash::Event.new(hit['_source']) - decorate(event) if @docinfo # do not assume event[@docinfo_target] to be in-place updatable. first get it, update it, then at the end set it in the event. @@ -190,6 +189,8 @@ def push_hit(hit, output_queue) event.set(@docinfo_target, docinfo_target) end + decorate(event) + output_queue << event end diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 5d463b27..a2158642 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.0.6' + s.version = '4.1.0' s.licenses = ['Apache License (2.0)'] s.summary = "Read from an Elasticsearch cluster, based on search query results" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 174a98bf..6626f1be 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -228,6 +228,27 @@ expect(event.get("[@metadata][_index]")).to eq('logstash-2014.10.12') expect(event.get("[@metadata][_id]")).to eq(nil) end + + it 'should be able to reference metadata fields in `add_field` decorations' do + config = %q[ + input { + elasticsearch { + hosts => ["localhost"] + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + docinfo => true + add_field => { + 'identifier' => "foo:%{[@metadata][_type]}:%{[@metadata][_id]}" + } + } + } + ] + + event = input(config) do |pipeline, queue| + queue.pop + end + + expect(event.get('identifier')).to eq('foo:logs:C5b2xLQwTZa76jBmHIbwHQ') + end end context "when not defining the docinfo" do From c7f3bebe9525550bfc4b7c1832e4ccc43995b98f Mon Sep 17 00:00:00 2001 From: Jake Landis Date: Fri, 27 Oct 2017 17:24:57 -0500 Subject: [PATCH 086/207] Travis - add 6.0 build, remove default JRuby 1.7 build, bump RVM versions --- .travis.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7af01f7d..1458a3b5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,18 +2,17 @@ sudo: false language: ruby cache: bundler -env: -rvm: -- jruby-1.7.25 matrix: include: - - rvm: jruby-9.1.10.0 + - rvm: jruby-9.1.13.0 env: LOGSTASH_BRANCH=master - - rvm: jruby-9.1.10.0 + - rvm: jruby-9.1.13.0 env: LOGSTASH_BRANCH=6.x - - rvm: jruby-1.7.25 + - rvm: jruby-9.1.13.0 + env: LOGSTASH_BRANCH=6.0 + - rvm: jruby-1.7.27 env: LOGSTASH_BRANCH=5.6 fast_finish: true install: true script: ci/build.sh -jdk: oraclejdk8 \ No newline at end of file +jdk: oraclejdk8 From 2c69f1db2ed4a3457f063b38ebebcb43ab617b54 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Tue, 7 Nov 2017 11:18:31 +0000 Subject: [PATCH 087/207] [skip ci] update gemspec summary --- CHANGELOG.md | 3 +++ logstash-input-elasticsearch.gemspec | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bdcfed29..9be65e2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.1.1 + - Update gemspec summary + ## 4.1.0 - Enable use of docinfo (@metadata) fields in `add_field` decorations diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index a2158642..a231093d 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,9 +1,9 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.1.0' + s.version = '4.1.1' s.licenses = ['Apache License (2.0)'] - s.summary = "Read from an Elasticsearch cluster, based on search query results" + s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" s.authors = ["Elastic"] s.email = 'info@elastic.co' From a36b933c7995f06a8e9dab158a9655a64b57c89f Mon Sep 17 00:00:00 2001 From: DeDe Morton Date: Tue, 7 Nov 2017 16:43:34 -0800 Subject: [PATCH 088/207] Deprecate document_type in the docs --- CHANGELOG.md | 3 +++ docs/index.asciidoc | 9 +++++++-- logstash-input-elasticsearch.gemspec | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9be65e2e..1c0f1e93 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.1.2 + - Docs: Deprecate `document_type` + ## 4.1.1 - Update gemspec summary diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 8a2d486c..43a4c9d5 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -127,6 +127,11 @@ Example } +NOTE: Starting with Logstash 6.0, the `document_type` option is +deprecated due to the +https://www.elastic.co/guide/en/elasticsearch/reference/6.0/removal-of-types.html[removal of types in Logstash 6.0]. +It will be removed in the next major version of Logstash. + [id="plugins-{type}s-{plugin}-docinfo_fields"] ===== `docinfo_fields` @@ -136,7 +141,7 @@ Example If document metadata storage is requested by enabling the `docinfo` option, this option lists the metadata fields to save in the current event. See -[Document Metadata](http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_document_metadata.html) +http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_document_metadata.html[Document Metadata] in the Elasticsearch documentation for more information. [id="plugins-{type}s-{plugin}-docinfo_target"] @@ -184,7 +189,7 @@ string authentication will be disabled. * Default value is `'{ "sort": [ "_doc" ] }'` The query to be executed. Read the -[Elasticsearch query DSL documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html) +https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html[Elasticsearch query DSL documentation] for more information. [id="plugins-{type}s-{plugin}-scroll"] diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index a231093d..aac81df0 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.1.1' + s.version = '4.1.2' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 3d756b0235617b41cb170fc0fb6216824d290855 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Mon, 8 Jan 2018 21:30:56 +0000 Subject: [PATCH 089/207] [skip ci] update license to 2018 --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 43976b73..2162c9b7 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2012–2016 Elasticsearch +Copyright (c) 2012-2018 Elasticsearch Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From 16171278d403ee8053f8a22e39c4c2a1769daee0 Mon Sep 17 00:00:00 2001 From: Davide Cavestro Date: Mon, 15 Jan 2018 13:24:03 +0100 Subject: [PATCH 090/207] Scheduling support (#81) * Added logic to support scheduling (copied from logstash-input-jdbc) * documented new configuration property * some tests --- docs/index.asciidoc | 35 +++++++++++++ lib/logstash/inputs/elasticsearch.rb | 50 ++++++++++++++++++- logstash-input-elasticsearch.gemspec | 6 +++ spec/inputs/elasticsearch_spec.rb | 75 +++++++++++++++++++++++++++- 4 files changed, 163 insertions(+), 3 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 43a4c9d5..18394987 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -32,6 +32,9 @@ plugin to version 4.0.2 or higher. Read from an Elasticsearch cluster, based on search query results. This is useful for replaying test logs, reindexing, etc. +You can periodically schedule ingestion using a cron syntax +(see `schedule` setting) or run the query one time to load +data into Logstash. Example: [source,ruby] @@ -55,6 +58,25 @@ This would create an Elasticsearch query with the following format: }' +==== Scheduling + +Input from this plugin can be scheduled to run periodically according to a specific +schedule. This scheduling syntax is powered by https://github.com/jmettraux/rufus-scheduler[rufus-scheduler]. +The syntax is cron-like with some extensions specific to Rufus (e.g. timezone support ). + +Examples: + +|========================================================== +| `* 5 * 1-3 *` | will execute every minute of 5am every day of January through March. +| `0 * * * *` | will execute on the 0th minute of every hour every day. +| `0 6 * * * America/Chicago` | will execute at 6:00am (UTC/GMT -5) every day. +|========================================================== + + +Further documentation describing this syntax can be found +https://github.com/jmettraux/rufus-scheduler#parsing-cronlines-and-time-strings[here]. + + [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Input Configuration Options @@ -71,6 +93,7 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |<>|No | <> |<>|No +| <> |<>|No | <> |<>|No | <> |<>|No | <> |<>|No @@ -192,6 +215,18 @@ The query to be executed. Read the https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html[Elasticsearch query DSL documentation] for more information. +[id="plugins-{type}s-{plugin}-schedule"] +===== `schedule` + + * Value type is <> + * There is no default value for this setting. + +Schedule of when to periodically run statement, in Cron format +for example: "* * * * *" (execute query every minute, on the minute) + +There is no schedule by default. If no schedule is given, then the statement is run +exactly once. + [id="plugins-{type}s-{plugin}-scroll"] ===== `scroll` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index fbcad1c5..bab4bd7d 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -15,6 +15,8 @@ # # Read from an Elasticsearch cluster, based on search query results. # This is useful for replaying test logs, reindexing, etc. +# It also supports periodically scheduling lookup enrichments +# using a cron syntax (see `schedule` setting). # # Example: # [source,ruby] @@ -37,6 +39,24 @@ # "sort": [ "_doc" ] # }' # +# ==== Scheduling +# +# Input from this plugin can be scheduled to run periodically according to a specific +# schedule. This scheduling syntax is powered by https://github.com/jmettraux/rufus-scheduler[rufus-scheduler]. +# The syntax is cron-like with some extensions specific to Rufus (e.g. timezone support ). +# +# Examples: +# +# |========================================================== +# | `* 5 * 1-3 *` | will execute every minute of 5am every day of January through March. +# | `0 * * * *` | will execute on the 0th minute of every hour every day. +# | `0 6 * * * America/Chicago` | will execute at 6:00am (UTC/GMT -5) every day. +# |========================================================== +# +# +# Further documentation describing this syntax can be found https://github.com/jmettraux/rufus-scheduler#parsing-cronlines-and-time-strings[here]. +# +# class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base config_name "elasticsearch" @@ -114,8 +134,16 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # SSL Certificate Authority file in PEM encoded format, must also include any chain certificates as necessary config :ca_file, :validate => :path + # Schedule of when to periodically run statement, in Cron format + # for example: "* * * * *" (execute query every minute, on the minute) + # + # There is no schedule by default. If no schedule is given, then the statement is run + # exactly once. + config :schedule, :validate => :string + def register require "elasticsearch" + require "rufus/scheduler" @options = { :index => @index, @@ -147,7 +175,27 @@ def register @client = Elasticsearch::Client.new(:hosts => hosts, :transport_options => transport_options) end + def run(output_queue) + if @schedule + @scheduler = Rufus::Scheduler.new(:max_work_threads => 1) + @scheduler.cron @schedule do + do_run(output_queue) + end + + @scheduler.join + else + do_run(output_queue) + end + end + + def stop + @scheduler.stop if @scheduler + end + + private + + def do_run(output_queue) # get first wave of data r = @client.search(@options) @@ -160,8 +208,6 @@ def run(output_queue) end end - private - def process_next_scroll(output_queue, scroll_id) r = scroll_request(scroll_id) r['hits']['hits'].each { |hit| push_hit(hit, output_queue) } diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index aac81df0..18387b76 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -25,6 +25,12 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'elasticsearch', ['>= 5.0.3', '< 6.0.0'] s.add_runtime_dependency 'logstash-codec-json' + s.add_runtime_dependency 'logstash-codec-plain' + s.add_runtime_dependency 'sequel' + s.add_runtime_dependency 'tzinfo' + s.add_runtime_dependency 'tzinfo-data' + s.add_runtime_dependency 'rufus-scheduler' s.add_development_dependency 'logstash-devutils' + s.add_development_dependency 'timecop' end diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 6626f1be..3363925e 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -2,12 +2,23 @@ require "logstash/devutils/rspec/spec_helper" require "logstash/inputs/elasticsearch" require "elasticsearch" +require "timecop" +require "stud/temporary" +require "time" +require "date" describe LogStash::Inputs::Elasticsearch do + let(:plugin) { LogStash::Inputs::Elasticsearch.new(config) } + let(:queue) { Queue.new } + it_behaves_like "an interruptible input plugin" do let(:esclient) { double("elasticsearch-client") } - let(:config) { { } } + let(:config) do + { + "schedule" => "* * * * * UTC" + } + end before :each do allow(Elasticsearch::Client).to receive(:new).and_return(esclient) @@ -271,4 +282,66 @@ end end end + + context "when scheduling" do + let(:config) do + { + "hosts" => ["localhost"], + "query" => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }', + "schedule" => "* * * * * UTC" + } + end + + response = { + "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "failed" => 0 + }, + "hits" => { + "total" => 1, + "max_score" => 1.0, + "hits" => [ { + "_index" => "logstash-2014.10.12", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "_source" => { "message" => ["ohayo"] } + } ] + } + } + + scroll_reponse = { + "_scroll_id" => "r453Wc1jh0caLJhSDg", + "hits" => { "hits" => [] } + } + + before do + plugin.register + end + + it "should properly schedule" do + + Timecop.travel(Time.new(2000)) + Timecop.scale(60) + runner = Thread.new do + expect(plugin).to receive(:do_run) { + queue << LogStash::Event.new({}) + }.at_least(:twice) + + plugin.run(queue) + end + sleep 3 + plugin.stop + runner.kill + runner.join + expect(queue.size).to eq(2) + Timecop.return + end + + end + end From f6cc41a44f6d50db214d7d5cbb751720b242f1f7 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Mon, 15 Jan 2018 15:23:59 +0000 Subject: [PATCH 091/207] bump to 4.2.0 --- CHANGELOG.md | 3 ++- logstash-input-elasticsearch.gemspec | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c0f1e93..f2dd2ef7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ -## 4.1.2 +## 4.2.0 - Docs: Deprecate `document_type` + - Add support for scheduling periodic execution of the query #81 ## 4.1.1 - Update gemspec summary diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 18387b76..5bd4fa0f 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.1.2' + s.version = '4.2.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From fa3b48c0b880eb7c07f57cb90090cf139ac0abb8 Mon Sep 17 00:00:00 2001 From: rolfchess <16006315+rolfchess@users.noreply.github.com> Date: Thu, 15 Mar 2018 14:11:42 +0100 Subject: [PATCH 092/207] Improve description of the index (#92) Directs the user to information on how to reference multiple indices. --- docs/index.asciidoc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 18394987..e0dd8e69 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -193,7 +193,11 @@ can be either IP, HOST, IP:port, or HOST:port. The port defaults to * Value type is <> * Default value is `"logstash-*"` -The index or alias to search. +The index or alias to search. See +https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-index.html[Multi Indices documentation] +in the Elasticsearch documentation for more information on how to reference +multiple indices. + [id="plugins-{type}s-{plugin}-password"] ===== `password` From 7ee7d8184575ee00b063a0c9d442a737f5c074ae Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Fri, 6 Apr 2018 23:27:10 +0100 Subject: [PATCH 093/207] set default_codec doc attribute --- CHANGELOG.md | 3 +++ docs/index.asciidoc | 3 +++ logstash-input-elasticsearch.gemspec | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2dd2ef7..eed1c666 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.2.1 + - Docs: Set the default_codec doc attribute. + ## 4.2.0 - Docs: Deprecate `document_type` - Add support for scheduling periodic execution of the query #81 diff --git a/docs/index.asciidoc b/docs/index.asciidoc index e0dd8e69..210e4267 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -1,5 +1,6 @@ :plugin: elasticsearch :type: input +:default_codec: json /////////////////////////////////////////// START - GENERATED VARIABLES, DO NOT EDIT! @@ -272,3 +273,5 @@ empty string authentication will be disabled. [id="plugins-{type}s-{plugin}-common-options"] include::{include_path}/{type}.asciidoc[] + +:default_codec!: \ No newline at end of file diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 5bd4fa0f..77625956 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.2.0' + s.version = '4.2.1' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 939f343a7d1dfd55a4313c2570d0f2d2f0940777 Mon Sep 17 00:00:00 2001 From: Ry Biesemeyer Date: Fri, 27 Jul 2018 22:13:58 +0000 Subject: [PATCH 094/207] add managed slice scrolling with `slices` option. --- CHANGELOG.md | 3 + docs/index.asciidoc | 25 +++ lib/logstash/inputs/elasticsearch.rb | 44 ++++- spec/inputs/elasticsearch_spec.rb | 233 +++++++++++++++++++++++++++ 4 files changed, 301 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eed1c666..c5897079 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## Unreleased + - Add support for managed multi-slice scrolling with `slices` option. + ## 4.2.1 - Docs: Set the default_codec doc attribute. diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 210e4267..76f23d4a 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -97,6 +97,7 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |<>|No | <> |<>|No +| <> |<>|No | <> |<>|No | <> |<>|No |======================================================================= @@ -250,6 +251,30 @@ round trip (i.e. between the previous scroll request, to the next). This allows you to set the maximum number of hits returned per scroll. +[id="plugins-{type}s-{plugin}-slices"] +===== `slices` + + * Value type is <> + * There is no default value. + * Sensible values range from 2 to about 8. + +In some cases, it is possible to improve overall throughput by consuming multiple +distinct slices of a query simultaneously using the +https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html#sliced-scroll[Sliced Scroll API], +especially if the pipeline is spending significant time waiting on Elasticsearch +to provide results. + +If set, the `slices` parameter tells the plugin how many slices to divide the work +into, and will produce events from the slices in parallel until all of them are done +scrolling. + +NOTE: The Elasticsearch manual indicates that there can be _negative_ performance + implications to both the query and the Elasticsearch cluster when a scrolling + query uses more slices than shards in the index. + +If the `slices` parameter is left unset, the plugin will _not_ inject slice +instructions into the query. + [id="plugins-{type}s-{plugin}-ssl"] ===== `ssl` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index bab4bd7d..a32ee5c9 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -1,6 +1,7 @@ # encoding: utf-8 require "logstash/inputs/base" require "logstash/namespace" +require "logstash/json" require "base64" # .Compatibility Note @@ -83,6 +84,10 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # round trip (i.e. between the previous scroll request, to the next). config :scroll, :validate => :string, :default => "1m" + # This parameter controls the number of parallel slices to be consumed simultaneously + # by this pipeline input. + config :slices, :validate => :number + # If set, include Elasticsearch document information such as index, type, and # the id in the event. # @@ -147,10 +152,14 @@ def register @options = { :index => @index, - :body => @query, :scroll => @scroll, :size => @size } + @base_query = LogStash::Json.load(@query) + if @slices + @base_query.include?('slice') && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `query` option cannot specify specific `slice` when configured to manage parallel slices with `slices` option") + @slices < 1 && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `slices` option must be greater than zero, got `#{@slices}`") + end transport_options = {} @@ -196,16 +205,39 @@ def stop private def do_run(output_queue) - # get first wave of data - r = @client.search(@options) + # if configured to run a single slice, don't bother spinning up threads + return do_run_slice(output_queue) if @slices.nil? || @slices <= 1 + + logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8 + + @slices.times.map do |slice_id| + Thread.new do + LogStash::Util::set_thread_name("#{@id}_slice_#{slice_id}") + do_run_slice(output_queue, slice_id) + end + end.map(&:join) + end + + def do_run_slice(output_queue, slice_id=nil) + slice_query = @base_query + slice_query = slice_query.merge('slice' => { 'id' => slice_id, 'max' => @slices}) unless slice_id.nil? + + slice_options = @options.merge(:body => LogStash::Json.dump(slice_query) ) + + logger.info("Slice starting", slice_id: slice_id, slices: @slices) unless slice_id.nil? + r = search_request(slice_options) r['hits']['hits'].each { |hit| push_hit(hit, output_queue) } + logger.debug("Slice progress", slice_id: slice_id, slices: @slices) unless slice_id.nil? + has_hits = r['hits']['hits'].any? - while has_hits && !stop? + while has_hits && r['_scroll_id'] && !stop? r = process_next_scroll(output_queue, r['_scroll_id']) + logger.debug("Slice progress", slice_id: slice_id, slices: @slices) unless slice_id.nil? has_hits = r['has_hits'] end + logger.info("Slice complete", slice_id: slice_id, slices: @slices) unless slice_id.nil? end def process_next_scroll(output_queue, scroll_id) @@ -243,4 +275,8 @@ def push_hit(hit, output_queue) def scroll_request scroll_id @client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll) end + + def search_request(options) + @client.search(options) + end end diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 3363925e..3a648b4f 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -84,6 +84,239 @@ insist { event.get("message") } == [ "ohayo" ] end + + # This spec is an adapter-spec, ensuring that we send the right sequence of messages to our Elasticsearch Client + # to support sliced scrolling. The underlying implementation will spawn its own threads to consume, so we must be + # careful to use thread-safe constructs. + context "with managed sliced scrolling" do + let(:config) do + { + 'query' => "#{LogStash::Json.dump(query)}", + 'slices' => slices, + 'docinfo' => true, # include ids + } + end + let(:query) do + { + "query" => { + "match" => { "city_name" => "Okinawa" } + }, + "fields" => ["message"] + } + end + let(:slices) { 2 } + + context 'with `slices => 0`' do + let(:slices) { 0 } + it 'fails to register' do + expect { plugin.register }.to raise_error(LogStash::ConfigurationError) + end + end + + context 'with `slices => 1`' do + let(:slices) { 1 } + it 'runs just one slice' do + expect(plugin).to receive(:do_run_slice).with(duck_type(:<<)) + expect(Thread).to_not receive(:new) + + plugin.register + plugin.run([]) + end + end + + context 'without slices directive' do + let(:config) { super().except('slices') } + it 'runs just one slice' do + expect(plugin).to receive(:do_run_slice).with(duck_type(:<<)) + expect(Thread).to_not receive(:new) + + plugin.register + plugin.run([]) + end + end + + 2.upto(8) do |slice_count| + context "with `slices => #{slice_count}`" do + let(:slices) { slice_count } + it "runs #{slice_count} independent slices" do + expect(Thread).to receive(:new).and_call_original.exactly(slice_count).times + slice_count.times do |slice_id| + expect(plugin).to receive(:do_run_slice).with(duck_type(:<<), slice_id) + end + + plugin.register + plugin.run([]) + end + end + end + + # This section of specs heavily mocks the Elasticsearch::Client, and ensures that the Elasticsearch Input Plugin + # behaves as expected when handling a series of sliced, scrolled requests/responses. + context 'adapter/integration' do + let(:response_template) do + { + "took" => 12, + "timed_out" => false, + "shards" => { + "total" => 6, + "successful" => 6, + "failed" => 0 + } + } + end + + let(:hits_template) do + { + "total" => 4, + "max_score" => 1.0, + "hits" => [] + } + end + + let(:hit_template) do + { + "_index" => "logstash-2018.08.23", + "_type" => "logs", + "_score" => 1.0, + "_source" => { "message" => ["hello, world"] } + } + end + + # BEGIN SLICE 0: a sequence of THREE scrolled responses containing 2, 1, and 0 items + # end-of-slice is reached when slice0_response2 is empty. + begin + let(:slice0_response0) do + response_template.merge({ + "_scroll_id" => slice0_scroll1, + "hits" => hits_template.merge("hits" => [ + hit_template.merge('_id' => "slice0-response0-item0"), + hit_template.merge('_id' => "slice0-response0-item1") + ]) + }) + end + let(:slice0_scroll1) { 'slice:0,scroll:1' } + let(:slice0_response1) do + response_template.merge({ + "_scroll_id" => slice0_scroll2, + "hits" => hits_template.merge("hits" => [ + hit_template.merge('_id' => "slice0-response1-item0") + ]) + }) + end + let(:slice0_scroll2) { 'slice:0,scroll:2' } + let(:slice0_response2) do + response_template.merge( + "_scroll_id" => slice0_scroll3, + "hits" => hits_template.merge({"hits" => []}) + ) + end + let(:slice0_scroll3) { 'slice:0,scroll:3' } + end + # END SLICE 0 + + # BEGIN SLICE 1: a sequence of TWO scrolled responses containing 2 and 2 items. + # end-of-slice is reached when slice1_response1 does not contain a next scroll id + begin + let(:slice1_response0) do + response_template.merge({ + "_scroll_id" => slice1_scroll1, + "hits" => hits_template.merge("hits" => [ + hit_template.merge('_id' => "slice1-response0-item0"), + hit_template.merge('_id' => "slice1-response0-item1") + ]) + }) + end + let(:slice1_scroll1) { 'slice:1,scroll:1' } + let(:slice1_response1) do + response_template.merge({ + "hits" => hits_template.merge("hits" => [ + hit_template.merge('_id' => "slice1-response1-item0"), + hit_template.merge('_id' => "slice1-response1-item1") + ]) + }) + end + end + # END SLICE 1 + + let(:client) { Elasticsearch::Client.new } + + # RSpec mocks validations are not threadsafe. + # Allow caller to synchronize. + def synchronize_method!(object, method_name) + original_method = object.method(method_name) + mutex = Mutex.new + allow(object).to receive(method_name).with(any_args) do |*method_args, &method_block| + mutex.synchronize do + original_method.call(*method_args,&method_block) + end + end + end + + before(:each) do + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + plugin.register + + # SLICE0 is a three-page scroll in which the last page is empty + slice0_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 0, 'max' => 2})) + expect(client).to receive(:search).with(hash_including(:body => slice0_query)).and_return(slice0_response0) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll1 })).and_return(slice0_response1) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll2 })).and_return(slice0_response2) + + # SLICE1 is a two-page scroll in which the last page has no next scroll id + slice1_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 1, 'max' => 2})) + expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_return(slice1_response1) + + synchronize_method!(plugin, :scroll_request) + synchronize_method!(plugin, :search_request) + end + + let(:emitted_events) do + queue = Queue.new # since we are running slices in threads, we need a thread-safe queue. + plugin.run(queue) + events = [] + events << queue.pop until queue.empty? + events + end + + let(:emitted_event_ids) do + emitted_events.map { |event| event.get('[@metadata][_id]') } + end + + it 'emits the hits on the first page of the first slice' do + expect(emitted_event_ids).to include('slice0-response0-item0') + expect(emitted_event_ids).to include('slice0-response0-item1') + end + it 'emits the hits on the second page of the first slice' do + expect(emitted_event_ids).to include('slice0-response1-item0') + end + + it 'emits the hits on the first page of the second slice' do + expect(emitted_event_ids).to include('slice1-response0-item0') + expect(emitted_event_ids).to include('slice1-response0-item1') + end + + it 'emits the hitson the second page of the second slice' do + expect(emitted_event_ids).to include('slice1-response1-item0') + expect(emitted_event_ids).to include('slice1-response1-item1') + end + + it 'does not double-emit' do + expect(emitted_event_ids.uniq).to eq(emitted_event_ids) + end + + it 'emits events with appropriate fields' do + emitted_events.each do |event| + expect(event).to be_a(LogStash::Event) + expect(event.get('message')).to eq(['hello, world']) + expect(event.get('[@metadata][_id]')).to_not be_nil + expect(event.get('[@metadata][_id]')).to_not be_empty + expect(event.get('[@metadata][_index]')).to start_with('logstash-') + end + end + end + end + context "with Elasticsearch document information" do let!(:response) do { From 762e540e3c8f4b5cbddcb1608cba84f1eab51b90 Mon Sep 17 00:00:00 2001 From: Rob Bavey Date: Thu, 3 Jan 2019 15:26:11 -0500 Subject: [PATCH 095/207] pin bundler version to < 2 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 1458a3b5..ea27df6b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,3 +16,4 @@ matrix: install: true script: ci/build.sh jdk: oraclejdk8 +before_install: gem install bundler -v '< 2' From be3ea46e32e0138b8f1fadd854958a3c41fc3cb1 Mon Sep 17 00:00:00 2001 From: Rob Bavey Date: Fri, 4 Jan 2019 11:27:37 -0500 Subject: [PATCH 096/207] [skip ci] Travis: update LOGSTASH_BRANCH from 6.[0..4] to 6.5 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ea27df6b..746f1452 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ matrix: - rvm: jruby-9.1.13.0 env: LOGSTASH_BRANCH=6.x - rvm: jruby-9.1.13.0 - env: LOGSTASH_BRANCH=6.0 + env: LOGSTASH_BRANCH=6.5 - rvm: jruby-1.7.27 env: LOGSTASH_BRANCH=5.6 fast_finish: true From 18444b58174b0c869de04ac5c3614fb684f47515 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Mon, 4 Feb 2019 17:06:50 +0000 Subject: [PATCH 097/207] bump to 4.3.0 --- CHANGELOG.md | 4 ++-- logstash-input-elasticsearch.gemspec | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5897079..bcd57a00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ -## Unreleased - - Add support for managed multi-slice scrolling with `slices` option. +## 4.3.0 + - Added managed slice scrolling with `slices` option ## 4.2.1 - Docs: Set the default_codec doc attribute. diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 77625956..f412fbd6 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.2.1' + s.version = '4.3.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 0dc8635891a8bc6a02ac29d8d405c67a8bff37dd Mon Sep 17 00:00:00 2001 From: Ry Biesemeyer Date: Wed, 13 Feb 2019 00:33:08 +0000 Subject: [PATCH 098/207] update matrix to include current targets [ci skip] --- .travis.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 746f1452..dc962739 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,9 +7,11 @@ matrix: - rvm: jruby-9.1.13.0 env: LOGSTASH_BRANCH=master - rvm: jruby-9.1.13.0 - env: LOGSTASH_BRANCH=6.x + env: LOGSTASH_BRANCH=7.0 - rvm: jruby-9.1.13.0 - env: LOGSTASH_BRANCH=6.5 + env: LOGSTASH_BRANCH=6.7 + - rvm: jruby-9.1.13.0 + env: LOGSTASH_BRANCH=6.6 - rvm: jruby-1.7.27 env: LOGSTASH_BRANCH=5.6 fast_finish: true From dcc3107a84a6a0243cb7dfb94de97ff146a99884 Mon Sep 17 00:00:00 2001 From: Karen Metts <35154725+karenzone@users.noreply.github.com> Date: Tue, 30 Apr 2019 17:52:26 -0400 Subject: [PATCH 099/207] [Doc] Fix deeplink into es ref (#103) * Fix deeplink into es ref * Add PR number/link to changelog --- CHANGELOG.md | 3 +++ docs/index.asciidoc | 9 ++++----- logstash-input-elasticsearch.gemspec | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bcd57a00..1f31e68c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.3.1 + - Fixed deeplink to Elasticsearch Reference [#103](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/103) + ## 4.3.0 - Added managed slice scrolling with `slices` option diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 76f23d4a..9ad64039 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -163,11 +163,10 @@ It will be removed in the next major version of Logstash. * Value type is <> * Default value is `["_index", "_type", "_id"]` -If document metadata storage is requested by enabling the `docinfo` -option, this option lists the metadata fields to save in the current -event. See -http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_document_metadata.html[Document Metadata] -in the Elasticsearch documentation for more information. +If document metadata storage is requested by enabling the `docinfo` option, this +option lists the metadata fields to save in the current event. See +{ref}/mapping-fields.html[Meta-Fields] in the Elasticsearch documentation for +more information. [id="plugins-{type}s-{plugin}-docinfo_target"] ===== `docinfo_target` diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index f412fbd6..6eb5fc2f 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.3.0' + s.version = '4.3.1' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From ea0fb4c644b5aa38b2fdc3cd69bbffbc8638904a Mon Sep 17 00:00:00 2001 From: Rob Bavey Date: Wed, 1 May 2019 14:30:25 -0400 Subject: [PATCH 100/207] Remove Hash#except from failing spec. Remove use of Hash#except, which was previously brought in via the 'i18n' gem, but, as of 7.0 no longer appears to be the case, and is causing test failures --- spec/inputs/elasticsearch_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 3a648b4f..8a077eb3 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -125,7 +125,7 @@ end context 'without slices directive' do - let(:config) { super().except('slices') } + let(:config) { super.tap { |h| h.delete('slices') } } it 'runs just one slice' do expect(plugin).to receive(:do_run_slice).with(duck_type(:<<)) expect(Thread).to_not receive(:new) From 94792daebd4572f373fbc026134e4cfc7cb3013c Mon Sep 17 00:00:00 2001 From: Karen Metts <35154725+karenzone@users.noreply.github.com> Date: Wed, 31 Jul 2019 13:34:19 -0400 Subject: [PATCH 101/207] [Doc]Update link to sliced scrolls doc (#106) * Update link path to sliced scrolls * Added PR number to changelog link --- CHANGELOG.md | 3 +++ docs/index.asciidoc | 2 +- logstash-input-elasticsearch.gemspec | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f31e68c..35f54e6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.3.2 + - Fixed broken link to Elasticsearch Reference [#106](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/106) + ## 4.3.1 - Fixed deeplink to Elasticsearch Reference [#103](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/103) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 9ad64039..896bd247 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -259,7 +259,7 @@ This allows you to set the maximum number of hits returned per scroll. In some cases, it is possible to improve overall throughput by consuming multiple distinct slices of a query simultaneously using the -https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html#sliced-scroll[Sliced Scroll API], +https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html#sliced-scroll[Sliced Scroll API], especially if the pipeline is spending significant time waiting on Elasticsearch to provide results. diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 6eb5fc2f..33409145 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.3.1' + s.version = '4.3.2' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 98479d56afdc42135222860de3bed91aaed0428f Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Tue, 6 Aug 2019 14:22:23 -0400 Subject: [PATCH 102/207] [skip ci] Travis: switch to openjdk8 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index dc962739..3b612474 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,5 +17,5 @@ matrix: fast_finish: true install: true script: ci/build.sh -jdk: oraclejdk8 +jdk: openjdk8 before_install: gem install bundler -v '< 2' From 3085b1d4aeef342149b35e854480f685d5ccc42e Mon Sep 17 00:00:00 2001 From: Rob Bavey Date: Wed, 23 Oct 2019 11:10:05 -0400 Subject: [PATCH 103/207] Loosen checks against ES gem and add integration test and docker testing This commit removes the upper bound on the Elasticsearch runtime which was restricting the ability to upgrade the version of the Elasticsearch gem in Logstash, which is required to enable compatibility with later versions of Elasticsearch post type-removal Fixes #110 --- .travis.yml | 38 +++++----- ci/Dockerfile.elasticsearch | 10 +++ ci/Dockerfile.logstash | 20 +++++ ci/docker-compose.yml | 36 +++++++++ ci/docker-lsonly-compose.yml | 20 +++++ ci/docker-run.sh | 9 +++ ci/docker-setup.sh | 76 +++++++++++++++++++ ci/elasticsearch-run.sh | 4 + ci/logstash-run.sh | 26 +++++++ logstash-input-elasticsearch.gemspec | 2 +- spec/es_helper.rb | 40 ++++++++++ spec/inputs/elasticsearch_spec.rb | 42 ++++------ spec/inputs/integration/elasticsearch_spec.rb | 43 +++++++++++ 13 files changed, 318 insertions(+), 48 deletions(-) create mode 100644 ci/Dockerfile.elasticsearch create mode 100644 ci/Dockerfile.logstash create mode 100644 ci/docker-compose.yml create mode 100644 ci/docker-lsonly-compose.yml create mode 100755 ci/docker-run.sh create mode 100755 ci/docker-setup.sh create mode 100755 ci/elasticsearch-run.sh create mode 100755 ci/logstash-run.sh create mode 100644 spec/es_helper.rb create mode 100644 spec/inputs/integration/elasticsearch_spec.rb diff --git a/.travis.yml b/.travis.yml index 3b612474..d2925566 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,21 +1,19 @@ --- -sudo: false -language: ruby -cache: bundler -matrix: - include: - - rvm: jruby-9.1.13.0 - env: LOGSTASH_BRANCH=master - - rvm: jruby-9.1.13.0 - env: LOGSTASH_BRANCH=7.0 - - rvm: jruby-9.1.13.0 - env: LOGSTASH_BRANCH=6.7 - - rvm: jruby-9.1.13.0 - env: LOGSTASH_BRANCH=6.6 - - rvm: jruby-1.7.27 - env: LOGSTASH_BRANCH=5.6 - fast_finish: true -install: true -script: ci/build.sh -jdk: openjdk8 -before_install: gem install bundler -v '< 2' +sudo: required +services: docker +addons: + apt: + packages: + - docker-ce +env: + - INTEGRATION=false ELASTIC_STACK_VERSION=5.x + - INTEGRATION=false ELASTIC_STACK_VERSION=6.x + - INTEGRATION=false ELASTIC_STACK_VERSION=7.x + - INTEGRATION=false ELASTIC_STACK_VERSION=7.x SNAPSHOT=true + - INTEGRATION=false ELASTIC_STACK_VERSION=8.x SNAPSHOT=true + - INTEGRATION=true ELASTIC_STACK_VERSION=6.x + - INTEGRATION=true ELASTIC_STACK_VERSION=7.x + - INTEGRATION=true ELASTIC_STACK_VERSION=7.x SNAPSHOT=true + - INTEGRATION=true ELASTIC_STACK_VERSION=8.x SNAPSHOT=true +install: ci/docker-setup.sh +script: ci/docker-run.sh \ No newline at end of file diff --git a/ci/Dockerfile.elasticsearch b/ci/Dockerfile.elasticsearch new file mode 100644 index 00000000..67627a39 --- /dev/null +++ b/ci/Dockerfile.elasticsearch @@ -0,0 +1,10 @@ +ARG elastic_stack_version +FROM docker.elastic.co/elasticsearch/elasticsearch$distribution_suffix:$elastic_stack_version + +ARG plugin_path=/usr/share/plugins/this +ARG es_path=/usr/share/elasticsearch +ARG es_yml=$es_path/config/elasticsearch.yml + +RUN rm -f $es_path/config/scripts + +COPY --chown=elasticsearch:elasticsearch ci/elasticsearch-run.sh $es_path/ diff --git a/ci/Dockerfile.logstash b/ci/Dockerfile.logstash new file mode 100644 index 00000000..943faf0a --- /dev/null +++ b/ci/Dockerfile.logstash @@ -0,0 +1,20 @@ +ARG elastic_stack_version +FROM docker.elastic.co/logstash/logstash$distribution_suffix:$elastic_stack_version +USER root +RUN yum install -y openssl +USER logstash +ENV JARS_SKIP="true" +ENV LOGSTASH_SOURCE=1 +ENV LS_JAVA_OPTS="-Xmx256m -Xms256m" +ENV PATH="${PATH}:/usr/share/logstash/vendor/jruby/bin" +COPY --chown=logstash:logstash Gemfile /usr/share/plugins/this/Gemfile +COPY --chown=logstash:logstash *.gemspec /usr/share/plugins/this/ +RUN cp /usr/share/logstash/logstash-core/versions-gem-copy.yml /usr/share/logstash/versions.yml +RUN gem install bundler -v '< 2' +WORKDIR /usr/share/plugins/this +RUN bundle install --path=vendor/bundler +COPY --chown=logstash:logstash . /usr/share/plugins/this +ARG elastic_stack_version +ARG integration +ENV INTEGRATION $integration +ENV ELASTIC_STACK_VERSION $elastic_stack_version diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml new file mode 100644 index 00000000..cd523f20 --- /dev/null +++ b/ci/docker-compose.yml @@ -0,0 +1,36 @@ +version: '3' + +# run tests: cd ci; docker-compose up --build --force-recreate +# manual: cd ci; docker-compose run logstash bash +services: + + logstash: + build: + context: ../ + dockerfile: ci/Dockerfile.logstash + args: + - elastic_stack_version=$ELASTIC_STACK_VERSION + - integration=${INTEGRATION:-false} + command: /usr/share/plugins/this/ci/logstash-run.sh + environment: + - SPEC_OPTS + tty: true + #volumes: + # - ./:/usr/share/plugins/this + + elasticsearch: + build: + context: ../ + dockerfile: ci/Dockerfile.elasticsearch + args: + - elastic_stack_version=$ELASTIC_STACK_VERSION + - integration=${INTEGRATION:-false} + command: /usr/share/elasticsearch/elasticsearch-run.sh + tty: true + ports: + - "9200:9200" + user: elasticsearch + + #volumes: + # - ./:/usr/share/plugins/this + diff --git a/ci/docker-lsonly-compose.yml b/ci/docker-lsonly-compose.yml new file mode 100644 index 00000000..fb38ffbe --- /dev/null +++ b/ci/docker-lsonly-compose.yml @@ -0,0 +1,20 @@ +version: '3' + +# run tests: cd ci; docker-compose up --build --force-recreate +# manual: cd ci; docker-compose run logstash bash +services: + + logstash: + build: + context: ../ + dockerfile: ci/Dockerfile.logstash + args: + - elastic_stack_version=$ELASTIC_STACK_VERSION + - integration=${INTEGRATION:-false} + command: /usr/share/plugins/this/ci/logstash-run.sh + environment: + - SPEC_OPTS + tty: true + #volumes: + # - ./:/usr/share/plugins/this + diff --git a/ci/docker-run.sh b/ci/docker-run.sh new file mode 100755 index 00000000..718f4972 --- /dev/null +++ b/ci/docker-run.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# This is intended to be run inside the docker container as the command of the docker-compose. +set -ex +if [ "$INTEGRATION" == "true" ]; then + docker-compose -f ci/docker-compose.yml up --exit-code-from logstash +else + docker-compose -f ci/docker-lsonly-compose.yml up --exit-code-from logstash +fi diff --git a/ci/docker-setup.sh b/ci/docker-setup.sh new file mode 100755 index 00000000..8ff8a400 --- /dev/null +++ b/ci/docker-setup.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +# This is intended to be run the plugin's root directory. `ci/docker-test.sh` +# Ensure you have Docker installed locally and set the ELASTIC_STACK_VERSION environment variable. +set -e + +VERSION_URL="https://raw.githubusercontent.com/elastic/logstash/master/ci/logstash_releases.json" + +if [ "$ELASTIC_STACK_VERSION" ]; then + echo "Fetching versions from $VERSION_URL" + VERSIONS=$(curl --silent $VERSION_URL) + if [[ "$SNAPSHOT" = "true" ]]; then + ELASTIC_STACK_RETRIEVED_VERSION=$(echo $VERSIONS | jq '.snapshots."'"$ELASTIC_STACK_VERSION"'"') + echo $ELASTIC_STACK_RETRIEVED_VERSION + else + ELASTIC_STACK_RETRIEVED_VERSION=$(echo $VERSIONS | jq '.releases."'"$ELASTIC_STACK_VERSION"'"') + fi + if [[ "$ELASTIC_STACK_RETRIEVED_VERSION" != "null" ]]; then + # remove starting and trailing double quotes + ELASTIC_STACK_RETRIEVED_VERSION="${ELASTIC_STACK_RETRIEVED_VERSION%\"}" + ELASTIC_STACK_RETRIEVED_VERSION="${ELASTIC_STACK_RETRIEVED_VERSION#\"}" + echo "Translated $ELASTIC_STACK_VERSION to ${ELASTIC_STACK_RETRIEVED_VERSION}" + export ELASTIC_STACK_VERSION=$ELASTIC_STACK_RETRIEVED_VERSION + fi + + echo "Testing against version: $ELASTIC_STACK_VERSION" + + if [[ "$ELASTIC_STACK_VERSION" = *"-SNAPSHOT" ]]; then + cd /tmp + + jq=".build.projects.\"logstash\".packages.\"logstash-$ELASTIC_STACK_VERSION-docker-image.tar.gz\".url" + result=$(curl --silent https://artifacts-api.elastic.co/v1/versions/$ELASTIC_STACK_VERSION/builds/latest | jq -r $jq) + echo $result + curl $result > logstash-docker-image.tar.gz + tar xfvz logstash-docker-image.tar.gz repositories + echo "Loading docker image: " + cat repositories + docker load < logstash-docker-image.tar.gz + rm logstash-docker-image.tar.gz + cd - + + if [ "$INTEGRATION" == "true" ]; then + + cd /tmp + + jq=".build.projects.\"elasticsearch\".packages.\"elasticsearch-$ELASTIC_STACK_VERSION-docker-image.tar.gz\".url" + result=$(curl --silent https://artifacts-api.elastic.co/v1/versions/$ELASTIC_STACK_VERSION/builds/latest | jq -r $jq) + echo $result + curl $result > elasticsearch-docker-image.tar.gz + tar xfvz elasticsearch-docker-image.tar.gz repositories + echo "Loading docker image: " + cat repositories + docker load < elasticsearch-docker-image.tar.gz + rm elasticsearch-docker-image.tar.gz + cd - + + fi + fi + + if [ -f Gemfile.lock ]; then + rm Gemfile.lock + fi + + if [ "$INTEGRATION" == "true" ]; then + docker-compose -f ci/docker-compose.yml down + docker-compose -f ci/docker-compose.yml build + else + docker-compose -f ci/docker-lsonly-compose.yml down + docker-compose -f ci/docker-lsonly-compose.yml build + fi +else + echo "Please set the ELASTIC_STACK_VERSION environment variable" + echo "For example: export ELASTIC_STACK_VERSION=6.2.4" + exit 1 +fi + diff --git a/ci/elasticsearch-run.sh b/ci/elasticsearch-run.sh new file mode 100755 index 00000000..9df8c88d --- /dev/null +++ b/ci/elasticsearch-run.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -ex + +/usr/share/elasticsearch/bin/elasticsearch -Ediscovery.type=single-node diff --git a/ci/logstash-run.sh b/ci/logstash-run.sh new file mode 100755 index 00000000..af0dfeaf --- /dev/null +++ b/ci/logstash-run.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -ex + +export PATH=$BUILD_DIR/gradle/bin:$PATH + +wait_for_es() { + echo "Waiting for elasticsearch to respond..." + es_url="http://elasticsearch:9200/" + count=120 + while ! curl --silent $es_url && [[ $count -ne 0 ]]; do + count=$(( $count - 1 )) + [[ $count -eq 0 ]] && return 1 + sleep 1 + done + echo "Elasticsearch is Up !" + + return 0 +} + +if [[ "$INTEGRATION" != "true" ]]; then + bundle exec rspec -fd spec/inputs -t ~integration +else + extra_tag_args="--tag integration" + wait_for_es + bundle exec rspec -fd $extra_tag_args --tag es_version:$ELASTIC_STACK_VERSION spec/inputs/integration +fi diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 33409145..5a64f286 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -22,7 +22,7 @@ Gem::Specification.new do |s| # Gem dependencies s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" - s.add_runtime_dependency 'elasticsearch', ['>= 5.0.3', '< 6.0.0'] + s.add_runtime_dependency 'elasticsearch', '>= 5.0.3' s.add_runtime_dependency 'logstash-codec-json' s.add_runtime_dependency 'logstash-codec-plain' diff --git a/spec/es_helper.rb b/spec/es_helper.rb new file mode 100644 index 00000000..5c7055da --- /dev/null +++ b/spec/es_helper.rb @@ -0,0 +1,40 @@ +module ESHelper + def self.get_host_port + return "http://elasticsearch:9200/" if ENV["INTEGRATION"] == "true" + raise "This setting is only used for integration tests" + end + + def self.get_client + Elasticsearch::Client.new(:hosts => [get_host_port]) + end + + def self.doc_type + if ESHelper.es_version_satisfies?(">=8") + nil + elsif ESHelper.es_version_satisfies?(">=7") + "_doc" + else + "doc" + end + end + + def self.index_doc(es, params) + type = doc_type + params[:type] = doc_type unless type.nil? + es.index(params) + end + + def self.es_version + ENV['ES_VERSION'] || ENV['ELASTIC_STACK_VERSION'] + end + + def self.es_version_satisfies?(*requirement) + es_version = RSpec.configuration.filter[:es_version] || ENV['ES_VERSION'] || ENV['ELASTIC_STACK_VERSION'] + if es_version.nil? + puts "Info: ES_VERSION, ELASTIC_STACK_VERSION or 'es_version' tag wasn't set. Returning false to all `es_version_satisfies?` call." + return false + end + es_release_version = Gem::Version.new(es_version).release + Gem::Requirement.new(requirement).satisfied_by?(es_release_version) + end +end \ No newline at end of file diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 8a077eb3..2528f284 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -375,15 +375,13 @@ def synchronize_method!(object, method_name) end it 'merges the values if the `docinfo_target` already exist in the `_source` document' do - metadata_field = 'metadata_with_hash' - config_metadata_with_hash = %Q[ input { elasticsearch { hosts => ["localhost"] query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' docinfo => true - docinfo_target => '#{metadata_field}' + docinfo_target => 'metadata_with_hash' } } ] @@ -392,33 +390,23 @@ def synchronize_method!(object, method_name) queue.pop end - expect(event.get("[#{metadata_field}][_index]")).to eq('logstash-2014.10.12') - expect(event.get("[#{metadata_field}][_type]")).to eq('logs') - expect(event.get("[#{metadata_field}][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') - expect(event.get("[#{metadata_field}][awesome]")).to eq("logstash") + expect(event.get("[metadata_with_hash][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[metadata_with_hash][_type]")).to eq('logs') + expect(event.get("[metadata_with_hash][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') + expect(event.get("[metadata_with_hash][awesome]")).to eq("logstash") end - it 'thows an exception if the `docinfo_target` exist but is not of type hash' do - metadata_field = 'metadata_with_string' - - config_metadata_with_string = %Q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - docinfo => true - docinfo_target => '#{metadata_field}' - } - } - ] - - pipeline = new_pipeline_from_string(config_metadata_with_string) - queue = Queue.new - pipeline.instance_eval do - @output_func = lambda { |event| queue << event } + context 'if the `docinfo_target` exist but is not of type hash' do + let (:config) { { + "hosts" => ["localhost"], + "query" => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }', + "docinfo" => true, + "docinfo_target" => 'metadata_with_string' + } } + it 'thows an exception if the `docinfo_target` exist but is not of type hash' do + plugin.register + expect { plugin.run([]) }.to raise_error(Exception, /incompatible event/) end - - expect { pipeline.run }.to raise_error(Exception, /incompatible event/) end it "should move the document info to the @metadata field" do diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb new file mode 100644 index 00000000..516dd250 --- /dev/null +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -0,0 +1,43 @@ +# encoding: utf-8 +require "logstash/devutils/rspec/spec_helper" +require "logstash/plugin" +require "logstash/inputs/elasticsearch" +require_relative "../../../spec/es_helper" + +describe LogStash::Inputs::Elasticsearch, :integration => true do + + let(:config) { { 'hosts' => [ESHelper.get_host_port], + 'index' => 'logs', + 'query' => '{ "query": { "match": { "message": "Not found"} }}' } } + let(:plugin) { described_class.new(config) } + let(:event) { LogStash::Event.new({}) } + + before(:each) do + @es = ESHelper.get_client + # Delete all templates first. + # Clean ES of data before we start. + @es.indices.delete_template(:name => "*") + # This can fail if there are no indexes, ignore failure. + @es.indices.delete(:index => "*") rescue nil + 10.times do + ESHelper.index_doc(@es, :index => 'logs', :body => { :response => 404, :message=> 'Not Found'}) + end + @es.indices.refresh + plugin.register + end + + after(:each) do + @es.indices.delete_template(:name => "*") + @es.indices.delete(:index => "*") rescue nil + end + + describe 'smoke test' do + it "should retrieve json event from elasticseach" do + queue = [] + plugin.run(queue) + event = queue.pop + expect(event).to be_a(LogStash::Event) + expect(event.get("response")).to eql(404) + end + end +end From cc0c847f68097ac6ba5638e09c63602bbe7701ac Mon Sep 17 00:00:00 2001 From: Rob Bavey Date: Mon, 11 Nov 2019 11:08:55 -0500 Subject: [PATCH 104/207] Bump version and update changelog Fixes #110 --- CHANGELOG.md | 3 +++ logstash-input-elasticsearch.gemspec | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 35f54e6b..36b460d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.3.3 + - Loosen restrictions on Elasticsearch gem [#110](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/110) + ## 4.3.2 - Fixed broken link to Elasticsearch Reference [#106](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/106) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 5a64f286..1e38dc79 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.3.2' + s.version = '4.3.3' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From c1223aa923c7d10d9bbd64cb44a8677f0a3f8a10 Mon Sep 17 00:00:00 2001 From: Rob Bavey Date: Fri, 6 Dec 2019 17:09:19 -0500 Subject: [PATCH 105/207] Switch to using manticore (#111) * Switch to using manticore * Add Secure integration tests * Changelog and version bump Remove beta qualifier from gemspec * Minor tweak to helper code * Pin Faraday to 0.15 to avoid breaking change 0.17 contains code that breaks with Logstash 5.x * Update lib/logstash/inputs/elasticsearch.rb Co-Authored-By: Colin Surprenant --- .travis.yml | 1 + CHANGELOG.md | 3 ++ ci/Dockerfile.elasticsearch | 9 ++++ ci/Dockerfile.logstash | 4 +- ci/docker-compose.yml | 2 + ci/logstash-run.sh | 11 +++- lib/logstash/inputs/elasticsearch.rb | 12 +++-- logstash-input-elasticsearch.gemspec | 4 +- spec/es_helper.rb | 25 +++++++-- spec/fixtures/test_certs/ca/ca.crt | 32 ++++++++++++ spec/fixtures/test_certs/ca/ca.key | 51 +++++++++++++++++++ spec/fixtures/test_certs/test.crt | 36 +++++++++++++ spec/fixtures/test_certs/test.key | 51 +++++++++++++++++++ spec/inputs/integration/elasticsearch_spec.rb | 28 ++++++++-- 14 files changed, 253 insertions(+), 16 deletions(-) create mode 100644 spec/fixtures/test_certs/ca/ca.crt create mode 100755 spec/fixtures/test_certs/ca/ca.key create mode 100644 spec/fixtures/test_certs/test.crt create mode 100644 spec/fixtures/test_certs/test.key diff --git a/.travis.yml b/.travis.yml index d2925566..55b79bf5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,5 +15,6 @@ env: - INTEGRATION=true ELASTIC_STACK_VERSION=7.x - INTEGRATION=true ELASTIC_STACK_VERSION=7.x SNAPSHOT=true - INTEGRATION=true ELASTIC_STACK_VERSION=8.x SNAPSHOT=true + - SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x install: ci/docker-setup.sh script: ci/docker-run.sh \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 36b460d7..3617f681 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.4.0 + - Changed Elasticsearch Client transport to use Manticore[#111](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/111) + ## 4.3.3 - Loosen restrictions on Elasticsearch gem [#110](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/110) diff --git a/ci/Dockerfile.elasticsearch b/ci/Dockerfile.elasticsearch index 67627a39..79c89ec2 100644 --- a/ci/Dockerfile.elasticsearch +++ b/ci/Dockerfile.elasticsearch @@ -4,7 +4,16 @@ FROM docker.elastic.co/elasticsearch/elasticsearch$distribution_suffix:$elastic_ ARG plugin_path=/usr/share/plugins/this ARG es_path=/usr/share/elasticsearch ARG es_yml=$es_path/config/elasticsearch.yml +ARG secure_integration RUN rm -f $es_path/config/scripts +COPY --chown=elasticsearch:elasticsearch spec/fixtures/test_certs/* $es_path/config/test_certs/ COPY --chown=elasticsearch:elasticsearch ci/elasticsearch-run.sh $es_path/ + +RUN if [ "$secure_integration" = "true" ] ; then echo "xpack.security.http.ssl.enabled: $secure_integration" >> $es_yml; fi +RUN if [ "$secure_integration" = "true" ] ; then echo "xpack.security.http.ssl.key: $es_path/config/test_certs/test.key" >> $es_yml; fi +RUN if [ "$secure_integration" = "true" ] ; then echo "xpack.security.http.ssl.certificate: $es_path/config/test_certs/test.crt" >> $es_yml; fi +RUN if [ "$secure_integration" = "true" ] ; then echo "xpack.security.http.ssl.certificate_authorities: [ '$es_path/config/test_certs/ca.crt' ]" >> $es_yml; fi +RUN if [ "$secure_integration" = "true" ] ; then $es_path/bin/elasticsearch-users useradd simpleuser -p abc123 -r superuser; fi +RUN if [ "$secure_integration" = "true" ] ; then $es_path/bin/elasticsearch-users useradd 'f@ncyuser' -p 'ab%12#' -r superuser; fi diff --git a/ci/Dockerfile.logstash b/ci/Dockerfile.logstash index 943faf0a..4ff76b9e 100644 --- a/ci/Dockerfile.logstash +++ b/ci/Dockerfile.logstash @@ -1,5 +1,5 @@ ARG elastic_stack_version -FROM docker.elastic.co/logstash/logstash$distribution_suffix:$elastic_stack_version +FROM docker.elastic.co/logstash/logstash:$elastic_stack_version USER root RUN yum install -y openssl USER logstash @@ -16,5 +16,7 @@ RUN bundle install --path=vendor/bundler COPY --chown=logstash:logstash . /usr/share/plugins/this ARG elastic_stack_version ARG integration +ARG secure_integration ENV INTEGRATION $integration +ENV SECURE_INTEGRATION $secure_integration ENV ELASTIC_STACK_VERSION $elastic_stack_version diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index cd523f20..e3fc218d 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -11,6 +11,7 @@ services: args: - elastic_stack_version=$ELASTIC_STACK_VERSION - integration=${INTEGRATION:-false} + - secure_integration=${SECURE_INTEGRATION:-false} command: /usr/share/plugins/this/ci/logstash-run.sh environment: - SPEC_OPTS @@ -25,6 +26,7 @@ services: args: - elastic_stack_version=$ELASTIC_STACK_VERSION - integration=${INTEGRATION:-false} + - secure_integration=${SECURE_INTEGRATION:-false} command: /usr/share/elasticsearch/elasticsearch-run.sh tty: true ports: diff --git a/ci/logstash-run.sh b/ci/logstash-run.sh index af0dfeaf..d0fb854b 100755 --- a/ci/logstash-run.sh +++ b/ci/logstash-run.sh @@ -6,6 +6,9 @@ export PATH=$BUILD_DIR/gradle/bin:$PATH wait_for_es() { echo "Waiting for elasticsearch to respond..." es_url="http://elasticsearch:9200/" + if [[ "$SECURE_INTEGRATION" == "true" ]]; then + es_url="https://elasticsearch:9200 -k" + fi count=120 while ! curl --silent $es_url && [[ $count -ne 0 ]]; do count=$(( $count - 1 )) @@ -18,9 +21,13 @@ wait_for_es() { } if [[ "$INTEGRATION" != "true" ]]; then - bundle exec rspec -fd spec/inputs -t ~integration + bundle exec rspec -fd spec/inputs -t ~integration -t ~secure_integration else - extra_tag_args="--tag integration" + if [[ "$SECURE_INTEGRATION" == "true" ]]; then + extra_tag_args="--tag secure_integration" + else + extra_tag_args="--tag ~secure_integration --tag integration" + fi wait_for_es bundle exec rspec -fd $extra_tag_args --tag es_version:$ELASTIC_STACK_VERSION spec/inputs/integration fi diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index a32ee5c9..bbdea5b9 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -4,6 +4,7 @@ require "logstash/json" require "base64" + # .Compatibility Note # [NOTE] # ================================================================================ @@ -149,6 +150,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base def register require "elasticsearch" require "rufus/scheduler" + require "elasticsearch/transport/transport/http/manticore" @options = { :index => @index, @@ -176,12 +178,12 @@ def register else @hosts end + ssl_options = { :ssl => true, :ca_file => @ca_file } if @ssl && @ca_file + ssl_options ||= {} - if @ssl && @ca_file - transport_options[:ssl] = { :ca_file => @ca_file } - end - - @client = Elasticsearch::Client.new(:hosts => hosts, :transport_options => transport_options) + @client = Elasticsearch::Client.new(:hosts => hosts, :transport_options => transport_options, + :transport_class => ::Elasticsearch::Transport::Transport::HTTP::Manticore, + :ssl => ssl_options) end diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 1e38dc79..312361c4 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.3.3' + s.version = '4.4.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -30,6 +30,8 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'tzinfo' s.add_runtime_dependency 'tzinfo-data' s.add_runtime_dependency 'rufus-scheduler' + s.add_runtime_dependency 'manticore', "~> 0.6" + s.add_runtime_dependency 'faraday', "~> 0.15.4" s.add_development_dependency 'logstash-devutils' s.add_development_dependency 'timecop' diff --git a/spec/es_helper.rb b/spec/es_helper.rb index 5c7055da..9d3e81ef 100644 --- a/spec/es_helper.rb +++ b/spec/es_helper.rb @@ -1,11 +1,30 @@ module ESHelper def self.get_host_port - return "http://elasticsearch:9200/" if ENV["INTEGRATION"] == "true" + return "elasticsearch:9200" if ENV["INTEGRATION"] == "true" || ENV["SECURE_INTEGRATION"] == "true" raise "This setting is only used for integration tests" end - def self.get_client - Elasticsearch::Client.new(:hosts => [get_host_port]) + def self.get_client(options = {}) + ssl_options = {} + hosts = [get_host_port] + + if options[:ca_file] + ssl_options = { :ssl => true, :ca_file => options[:ca_file] } + hosts.map! do |h| + host, port = h.split(":") + { :host => host, :scheme => 'https', :port => port } + end + end + + transport_options = {} + + if options[:user] && options[:password] + token = Base64.strict_encode64("#{options[:user]}:#{options[:password]}") + transport_options[:headers] = { :Authorization => "Basic #{token}" } + end + + @client = Elasticsearch::Client.new(:hosts => hosts, :transport_options => transport_options, :ssl => ssl_options, + :transport_class => ::Elasticsearch::Transport::Transport::HTTP::Manticore) end def self.doc_type diff --git a/spec/fixtures/test_certs/ca/ca.crt b/spec/fixtures/test_certs/ca/ca.crt new file mode 100644 index 00000000..d3a1abb6 --- /dev/null +++ b/spec/fixtures/test_certs/ca/ca.crt @@ -0,0 +1,32 @@ +-----BEGIN CERTIFICATE----- +MIIFeTCCA2GgAwIBAgIUU+VHJ91JsLLA1GJYC+UchNfw3hEwDQYJKoZIhvcNAQEL +BQAwTDELMAkGA1UEBhMCUFQxCzAJBgNVBAgMAk5BMQ8wDQYDVQQHDAZMaXNib24x +DjAMBgNVBAoMBU15TGFiMQ8wDQYDVQQDDAZSb290Q0EwHhcNMTkwNzE1MTMxMTI5 +WhcNMjQwNzE0MTMxMTI5WjBMMQswCQYDVQQGEwJQVDELMAkGA1UECAwCTkExDzAN +BgNVBAcMBkxpc2JvbjEOMAwGA1UECgwFTXlMYWIxDzANBgNVBAMMBlJvb3RDQTCC +AiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAMtTMqAWuH17b9XqPa5L3HNq +gnZ958+gvcOt7Q/sOEvcDQJgkzZ+Gywh5er5JF2iomYOHiD5JncYr4YmRQKuYfD6 +B1WI5FuQthD/OlA1/RHqtbY27J33SaO66ro8gal7vjHrXKQkefVYRwdfO6DqqbhV +6L4sMiy8FzQ55TMpoM35cWuvoAMxvSQqGZ4pYYKnfNSGhzHvssfNS1xu/Lwb7Vju +4jPhp+43BkGwEimI5km7jNC1nwjiHtxDsY/s93AKa/vLktXKUK5nA3jjJOhAbRTV +nbOAgxFt0YbX98xW/aUqscgBUVs9J/MyTRMwVKJ7Vsmth1PdJQksUASuzESlSPl0 +9dMjTQ+MXzJDt0JvX8SIJPmbBng78MSaCUhpOZiii1l2mBfPWejx20I/SMCUNmzb +wm2w9JD50Jv2iX4l4ge4H1CIK1/orW1pdY9xPL0uKYm6ADsDC0B8sGgNMBXeB6aL +ojY1/ITwmmfpfk9c/yWPfC7stHgCYRAv5MfGAsmv0/ya5VrWQGBJkFiYy1pon6nx +UjCbgn0RABojRoGdhhY3QDipgwmSgFZxr064RFr1bt/Ml3MJmPf535mSwPdk/j/z +w4IZTvlmwKW3FyMDhwYL/zX7J0c6MzMPLEdi73Qjzmr3ENIrir4O86wNz81YRfYk +g9ZX8yKJK9LBAUrYCjJ3AgMBAAGjUzBRMB0GA1UdDgQWBBShWnSceOrqYn9Qa4WG +dIrvKNs/KzAfBgNVHSMEGDAWgBShWnSceOrqYn9Qa4WGdIrvKNs/KzAPBgNVHRMB +Af8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQBRQK0m3t5h2Y3CUCJYLMiCUge4 +UOzvpCoawSXH1FP2ycA+P1bP8H8htjwvV334ZADlQrDQRu0hqa1T+DxwhLxNOxgE +1XCthN3TTyd3O1mT4NmT6mcn2wYSn/JC6fPwFcloX8BcUvxl+xwmOgL/pzgf1ekK +MVS0n+r3bzdFTgGnvsmxmPHe2bUhyXXqzQIx3ObSGtuKYUu7aZEysEtJhaR+vGTd +jjTOV2S71edVlKTxRLZpHgoTZpBL/phwRQ63vdef4ftNGs0glGDc0yqXGMxMALOl +Up7+H4HI99rldZcul6oZ+ORltt047Hk7ctWb20SqxEH9tGLXKm6hDEL9HzyFXeyJ +DAue1GF+3H0KvsjSs5XH7LHMuJDCuSP64+h9gzkI+q06oBNX/9pQyQaHj0K4don8 +lWOMLI4gQibV7R1Opt2feA8MwWxouP/yni8IX6sPePVQ+fLEk1C+Kg+x6k1yQHEM +36BEP6iYOYvqG0OIjMas2U7Yhn2wWlVm9It3WMyaW8ZPI8kwc3dx715dZuNg/zjd +rJS678BNBVxInc7dzpY6el0Lr70CGwiJpX/N9P1yiTFZ7GZm3Kax8QnTtvqXzRIy +sBgt8BVZHUe1lWFYlG+jlakiXqz752nmHuwif7iBI4iWzRmW2vYPfTEmYPRLZES2 +nIg9fQPvVw+fIHACZQ== +-----END CERTIFICATE----- diff --git a/spec/fixtures/test_certs/ca/ca.key b/spec/fixtures/test_certs/ca/ca.key new file mode 100755 index 00000000..e39ef42a --- /dev/null +++ b/spec/fixtures/test_certs/ca/ca.key @@ -0,0 +1,51 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIJKAIBAAKCAgEAy1MyoBa4fXtv1eo9rkvcc2qCdn3nz6C9w63tD+w4S9wNAmCT +Nn4bLCHl6vkkXaKiZg4eIPkmdxivhiZFAq5h8PoHVYjkW5C2EP86UDX9Eeq1tjbs +nfdJo7rqujyBqXu+MetcpCR59VhHB187oOqpuFXoviwyLLwXNDnlMymgzflxa6+g +AzG9JCoZnilhgqd81IaHMe+yx81LXG78vBvtWO7iM+Gn7jcGQbASKYjmSbuM0LWf +COIe3EOxj+z3cApr+8uS1cpQrmcDeOMk6EBtFNWds4CDEW3Rhtf3zFb9pSqxyAFR +Wz0n8zJNEzBUontWya2HU90lCSxQBK7MRKVI+XT10yNND4xfMkO3Qm9fxIgk+ZsG +eDvwxJoJSGk5mKKLWXaYF89Z6PHbQj9IwJQ2bNvCbbD0kPnQm/aJfiXiB7gfUIgr +X+itbWl1j3E8vS4piboAOwMLQHywaA0wFd4HpouiNjX8hPCaZ+l+T1z/JY98Luy0 +eAJhEC/kx8YCya/T/JrlWtZAYEmQWJjLWmifqfFSMJuCfREAGiNGgZ2GFjdAOKmD +CZKAVnGvTrhEWvVu38yXcwmY9/nfmZLA92T+P/PDghlO+WbApbcXIwOHBgv/Nfsn +RzozMw8sR2LvdCPOavcQ0iuKvg7zrA3PzVhF9iSD1lfzIokr0sEBStgKMncCAwEA +AQKCAgBVVGU6qk5i2xrkO5oHO+8YwOpfsBdJG7yIEsYamJhHveH3zW/6vpHIw7Eq +G8UXRtnA2svqKqXp9YI0Wns71NNlvoi1bO3pP6IpH/PpFb9PdaEtB3/mC5HsFNXN +svb3mecILC6E9In6XUHx5hWwQstXgTZcGVA1VfqnAGUgQ6goyTbAasRMkuM9+i0m +I1e47XGF/69dVatCDvZBpJKMn2vMlvR3sYw4fP8zMiFtLPb4mq1OKerEX6Fz7zTl +oh119+m5koXdEzso9jKO2UTz85XT2JKGcriO5/e3D4v/RcLNPk2+Ek+CavgJKGMQ +WogqaHjTyu+wUm7omqA6VuGDLZqh1r0xYR+EXVMAudLjy7/NtAaE4MVOqVRs4WVd +sGccyirkTosxlvK3/vTfsp0VQtreBbxO1maqR5od0aa36MrP4Sk5O07yB9GAthp8 +5qlqtiYaO2Hcq2KJjKPUGwXlAWFZtENQe+G/jy+gYVDwKRInK7f7HubZlAMwsq2S +LSjtgvhqayAMsa7HoeevSVPLVdFb1IVkIw2jgMhXRgxmKa8WzbAUs124f9ey9z81 +si7w+qpZHq9LGChBjweTbd0abCianyRGHZIlDBE43XEcs3easxuHM6eOoJz0B7aj +oCXBCo/6Zd0om4ll5jva2+VOH2wTkZk7OhPiGU2f4g7kTJNAAQKCAQEA7YT3UdjN +HybAD3c/a5Kh17R4zCvymQkUWBs80ZaV9LlCYgie6aWlY6e+9m6vpDhY8dJZd+Zm +hlAF3VitRLw3SQUEImEC1caS1q99o1eQxMGu+mk9OiibF9PzZwoPE6zt5EZ0I/Ha +ifmf0Jn3xLyWF4qOKvO3gbWLknirDKffzNnWtFr4SQlEwtOR4m7IFDEz7e7RoGlv +K1qEFyK1PCfR8GeVHXWkJ3udHJRIZlEtNNLkHzU4nCRRYTvQ4l67rD9Tj7CoLbH1 +2OGSSvAkg+1lTBBs6RXZvhVvLrJVtQTXR7Oi8Z3mi3iJu9oWYa/OFaJl4lAN9xTe +QY0u0J1+AS5qAQKCAQEA2yUhO3rC1A7qHgbY4dAsx8f8yy9D0yCrI9OLnPQNF3ws +4mC1fUS60+07u0FYkgU3zIDwdLj5nsxWjB4ciY4hCgwz7pNJWlowurrfTvQNlqvC +m+Jrt1HYoaV+73mSj+rGv89HXWBW2I/1ED37BRoNB/YIMd/MUL8h0ubt3LIVaRow +41DT3dM969zuw3Avpx1uXQdnijJ1kA3oHpJ756YLHri/Nv6K0hJmGAbMrHPRxuhY +hYrxPJPVlp5mWUIjNkKoaNl3du3a6iVSbf7W15LxhAHmkKozxnhqoMldI6C8R548 +IKGyW4wo3GQvcEGPhgGnz2lswmvtx/6cWMv81b7sdwKCAQAXiC3sqPshk/hBUAIz +iTKJqXKyX8RITzL5y7EJ3s56kYQ3LD16TpQFPJBZ3/t83KxLQRjmHSiZNAJwvKFm +BvO/Q0T2lv/t6B+SL47WCQ3pwHqyioyrX2yGPtSFp+R4gZCMJkLJcOPC+b1QsIBw +uHJyYKLXNJBAxJjQaS4hMdylcguokL66lbV/S/DPK0SdY4aOkzOnneqKtAwUMrcb +/6H4HHsUkRwzYTbepv5JTM+axS4evWofZiW96Ww3kUUsupVvzgPLiy8dTrPswsAL +ZhC8KYBw015gS8VZLgf5yEH/85c4MvmtZcLXnrLK+N0FHbLUajQH/8RJYFB8EK50 +NYIBAoIBAQCNO8/AIqz/uCEAew858U168BOm62492lcRvtvCqrLpSNkwiH1PH4V8 +4e7WDxZC/WPpw8u0niYaRr0cjqd7q4L1k8nAX2It/eRb4+XZX4aGbWn9xx3+xpvk +CeHV+rcPU5MFuVPYBSfTaGvbLObjcdemItVDN2XZQGVPJA92ZbtQwlVxmv0AgIzu +vrOOd3XusRQYlpYPRdfooF3RnjyGncea5BwwGDpliwALSg6MshQnqkSqxFIW5XwJ +F0sFCY/h/3HEKStKFZ85uhX21/+kbYqDtinfYCIALkkiGMSTGptdWMiNi0iEob8P +0u2T3lzeU6DQFrTGVIKpmxkuTUFEjEifAoIBAH4nyu4ei4u7sGUhSZ79egUbqBZR +pjYblM8NB5UOAVmmbaswCWozsnsaBdIgymeokQXDPqIOwadP8IgGrgIxS5phdgvE +CNepxQDoVTXYEecPjc0LL4Kb+urmJL7HEP9BteIkc+7l8b9USDhNlJeCXICoJKBu +bNxgm60ZuoObx7h5APq9wC4x8Xj7AxQKu57Ied/tRFPCHW4UbhZhJfrnS2xTTk0u +z71AS7akI/NPfm3nLviISZeDzTgYs6vLYr/j4JUlcw1z6UpX4DvNm/MULi7ItXP5 +yV2H8jpCdjAe+OoC3OHTuQ8FQR29y758HUY7iF8ruuqUSWxF7pfL/a27EMw= +-----END RSA PRIVATE KEY----- diff --git a/spec/fixtures/test_certs/test.crt b/spec/fixtures/test_certs/test.crt new file mode 100644 index 00000000..11d4a238 --- /dev/null +++ b/spec/fixtures/test_certs/test.crt @@ -0,0 +1,36 @@ +-----BEGIN CERTIFICATE----- +MIIGQjCCBCqgAwIBAgIBAzANBgkqhkiG9w0BAQsFADBMMQswCQYDVQQGEwJQVDEL +MAkGA1UECAwCTkExDzANBgNVBAcMBkxpc2JvbjEOMAwGA1UECgwFTXlMYWIxDzAN +BgNVBAMMBlJvb3RDQTAeFw0xOTA3MTUxMzEzMDVaFw0yMjA0MTAxMzEzMDVaMFMx +CzAJBgNVBAYTAlBUMQswCQYDVQQIDAJOQTEPMA0GA1UEBwwGTGlzYm9uMQ4wDAYD +VQQKDAVNeUxhYjEWMBQGA1UEAwwNZWxhc3RpY3NlYXJjaDCCAiIwDQYJKoZIhvcN +AQEBBQADggIPADCCAgoCggIBAMYhP2zPOE3ke9naeK+cIPNV91htuoGGARs+mlY/ +IVxXSvau2ZZ94rkQR2xNL8TLijBNx46mU+kCniy8X5r+LX9seGqdBhhTh/tCJzh8 +MCzMt2JIijSjVyw28iiCb8/669LMTp5lFlRKajj11jlIpIm3o+OHqUzYwcSOw8og +p0A3nvAQ33Srghm/oAcT2umGrFyYXWT6PnGaEJRLUQn7LuHJnRLseCF2Cn/RzFK7 +/tiVVjImmQiVB3dE9fMR/pVJiO2v0COnWuG+/brXWrQIHk0AuD8pHc6Iw9iZODkc +Ao53B41qbvqcbdXFN5XfL4tb+lkBuLioCX7j9zR44awvuj9hKfuqFOFTUBZL2RjV +bFMKspGHnytQZF+a+mc5H33G9HiPP3jZE2JjrWlOay+j6ImylMgjcZmHAgaUe3ET +1GfnSVZBwO4MMd85taHNvitLnkEREjANSoPUuAJF3SKRHE9K8jUAzhyXflvgNNoM +tyczoQ5/L5BNiyA2h+1TU8jWicNDtl1+CtOsgEVBBHA6p/IHhsHbNZWPrYtIO9mh +hiJw1R5yrITXnjZY0rObITwyt/e6Sc3YnoQfsSGaLJEG0aDc0RALAhgzj+RY8086 +2RKOyfdw1sw1RmJKdCf+dOzhPyDpvauvCxrL8UZQTzcBs+qpxOWnZFRWeNsLwoDn +6JXXAgMBAAGjggEmMIIBIjAJBgNVHRMEAjAAMBEGCWCGSAGG+EIBAQQEAwIGQDAz +BglghkgBhvhCAQ0EJhYkT3BlblNTTCBHZW5lcmF0ZWQgU2VydmVyIENlcnRpZmlj +YXRlMB0GA1UdDgQWBBRvvz0yGw6Tz2UxbBLAGyzVMtcMUDCBiAYDVR0jBIGAMH6A +FKFadJx46upif1BrhYZ0iu8o2z8roVCkTjBMMQswCQYDVQQGEwJQVDELMAkGA1UE +CAwCTkExDzANBgNVBAcMBkxpc2JvbjEOMAwGA1UECgwFTXlMYWIxDzANBgNVBAMM +BlJvb3RDQYIUU+VHJ91JsLLA1GJYC+UchNfw3hEwDgYDVR0PAQH/BAQDAgWgMBMG +A1UdJQQMMAoGCCsGAQUFBwMBMA0GCSqGSIb3DQEBCwUAA4ICAQCaABHQxm6mtrM9 +f7kbgzuhEc47Q+bgrbjxeoIVOeO2Zshdw0SZlfkWvWe0622WSeWMsTBJ3hoaQwZe +9FUf1lnsWe6u6oOckiG9OjE0TyXJ7+eghdL1HPeXgJ+4ihwJsRtkNEljWf4HS7/n +y5LaFhcXdn2ZdbUKJ7z7zXqzh2Cp8VUBtsR+/IdiLjSN81dQou77/a2M/a/7BI2Z +HhUlUx1T7jHzNllJBRF3IaOk72yjoU4cL0qVy9874SXPwdpeFHtvS4TdQTLqnAGR +liHJcB1ZNz1sVOXndw3Wbvv6iB5y+IX/Y/kRSHS6zpZGdAb7ar/Vgl+Uvs3fKi44 +y9hq2b49bYlcSQMtmlimCBDiu82z0aYtVFLalZ2L/W7CMaeE3jpyzu/bbygRv/Bp +lKSaUtaFIVgiuRBPwIBDMyai3CJ5L+dJrJPU2JzzQvtJGFQCFCIHd9rqweubZB6V +re5cUn4dxlxA5SkZ0amFFV5DpP0YhThA/gq0t/NeWRmCEEBWNXZaqFmDhiYS5mnu +Z+NUtv8E332S46RdfneHe961SlMXEFC96I+1HOjXHdXlqKfOU8Qvy8VzsnpjuNE5 +VTrvnAM1L3LwqtYQYfUWUHYZFYdvh8layA2ImNE7yx/9wIIkw/L1j9m71Upi6WKR +FKbYFqzgpWksa+zZ2RYYplUAxq0wYw== +-----END CERTIFICATE----- diff --git a/spec/fixtures/test_certs/test.key b/spec/fixtures/test_certs/test.key new file mode 100644 index 00000000..769761c7 --- /dev/null +++ b/spec/fixtures/test_certs/test.key @@ -0,0 +1,51 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIJKQIBAAKCAgEAxiE/bM84TeR72dp4r5wg81X3WG26gYYBGz6aVj8hXFdK9q7Z +ln3iuRBHbE0vxMuKME3HjqZT6QKeLLxfmv4tf2x4ap0GGFOH+0InOHwwLMy3YkiK +NKNXLDbyKIJvz/rr0sxOnmUWVEpqOPXWOUikibej44epTNjBxI7DyiCnQDee8BDf +dKuCGb+gBxPa6YasXJhdZPo+cZoQlEtRCfsu4cmdEux4IXYKf9HMUrv+2JVWMiaZ +CJUHd0T18xH+lUmI7a/QI6da4b79utdatAgeTQC4PykdzojD2Jk4ORwCjncHjWpu ++pxt1cU3ld8vi1v6WQG4uKgJfuP3NHjhrC+6P2Ep+6oU4VNQFkvZGNVsUwqykYef +K1BkX5r6Zzkffcb0eI8/eNkTYmOtaU5rL6PoibKUyCNxmYcCBpR7cRPUZ+dJVkHA +7gwx3zm1oc2+K0ueQRESMA1Kg9S4AkXdIpEcT0ryNQDOHJd+W+A02gy3JzOhDn8v +kE2LIDaH7VNTyNaJw0O2XX4K06yARUEEcDqn8geGwds1lY+ti0g72aGGInDVHnKs +hNeeNljSs5shPDK397pJzdiehB+xIZoskQbRoNzREAsCGDOP5FjzTzrZEo7J93DW +zDVGYkp0J/507OE/IOm9q68LGsvxRlBPNwGz6qnE5adkVFZ42wvCgOfoldcCAwEA +AQKCAgA1FkOATCWx+T6WKMudgh/yE16q+vu2KMmzGxsPcOrnaxxS7JawlBpjq9D3 +W9coy8DDIJQPzNE+5cyr/+0+Akz+j3nUVy6C5h7RW/BWWjAuUMvyMa2WXQ3GcxJ/ +eDOtbnYxjTyjhEJvY2EC0hwMTUKJBAONu5PJW2rP19DuH8Gwmzai7GJzSGEbtRST +0OYfHE6ioNCldce1eKpokaWtHvh41ySXJXUqwg4eIYC1ylmGfr0RwvXOLuBJPNkJ +wBCOv51I0oragsT/J8Wkgn9zLZmw2DiF8+ZgqJSRPLyr0K1+rrX/Vj1WOQPU+3rh +VWPP211A7A0qrRuePEbIcHtHP6KPUCepABL44K33zyyOydmnJ7vg3dsW7AN7+Y6O +H4B24d1ogn4TJwzpZCfRvqJJVu2wsnzleng9PcpXyHhldB6S9h2fPpNqDUBvfxMv +w/fGZ2ZpOeUKRfQ7VVR3XIWwFq/eDhzLicHipaoM+6gKeOZdJPAc0Ew5jvOXQSBD +CYCM12a8gnEYd55NLo/fF3wX6Wdq/X6EbWW97gwtmmEqnhcZMxLdeMuKyli22JyX +Ik7QIDsmPSWhCkI2JvQ+CAZZp6oMEKuSb7UqqfACQreIuxCUmPTZq/pAEUGSCZGP +wnWqOk5jwxJ4d5TQm7g2RgPC6lTd7as1m4+JB8H1cNVpS2d0AQKCAQEA5tL9WjIK +u6x1h4a4KPmOQ9B34GxmGM+P9/bQkkJsWYf5eG1MlYPAigAiN0PMYPHDweeROsOO +rvmJiWXtmPMVFFSalHVotN6LMj400HhHObg7dADDYTBBGX6QuljxBX9jgUiKSOKO +66ngXEyRdUmTAbral3UuSCFGcqG8Khd3taULO2q5JQLEioFT+Lr0UzHywVSJq06O +k37aC3zpiQj4S/bJG4GOadYDIENq+gRCIU7Hn4pS1qtxLYMyhtNDzK0pVriYNj9T +ydHUxSKZO6ogM4423wVKnKOa2Cj4rgKBDHGpJJ9R0ZhrTbAQOa8LgDy1P6aMlvH/ +t9bG/HClmuHrFwKCAQEA271gZpIeCFG/XYIbxO8Uh9wG2cHrt7idw0iFTW4PpOBk +CGExq7WlkR29BkzxTDEXz5bSgDa8Q83cDf9EjSHVJYVGSYnodz7ZV8jZd2PUdCV1 +dL1kHV7vqz/KKxuCp7icCwzG9rQ1CjsTv8gBM3cN6DrZgw/2F+HjQpCkeyxc6KBR +Q+167yaLvOv3W0BHdSywtiNDU48oSSBpEK2anh7ShjG3BaEr/gAqGsTvxjsl7zDg +6MZFegeYPFIEH4ftvLZugPgd3NBg00CfsNRkjVWsH51y4gBy2ZL8d31Q2D2FI94s +he57Trvs8t8Y9QHGTMCuUk9IwRBpgapmW+c6G65jQQKCAQB0IPquAjc8LIwE31aP +5t4YaC2nj2HeYa8BBf/xVdHFSY/Ncs/w+RRJWb/dJhMRQxFF4QdEFVcX2RvFMK7V +IJceX2JWBqvjM4O8h2dy6tCKzZG7zxZ9MxXNicuutUith2W8iY5pFPLqxdDHHw6f +f6CiYivPv3jFeVhEG/LbCmuDy8FW5162rCnNtTtWDFkX8i077xhEQ4Wf11ZEKVgl +RYoGTeboG8pWnQF9ne2YU8Qhlc0BC0qaDi8mwrcM9gVKWGRP6RdLU5kIFLWDaODH +D9Sbm5UnpzXekME6t4JFqaTbaeO7NRyo4pI5x7aiDtsubVyS5WweFSqeh0QdhV8M +CVWJAoIBAQCJ7OSFfVG8hxNG7lPf2PFaFZF3PXFncEoWOX7yixLmurIPUHKNBQdX +fwMW4VTULjxz4IdgUvr41E47uu6cZ5ASbQUhQ57WumxR3ZAikFbaUGjZTcd5aA2n +v/J1/F6WSBoqFWsNjy97rHlI95FJbIEeM1I0IeCmPPMY5RFY/w+SNI7NxFJVqiwr ++TDZ5g70TYjdymSIHmN7AwzvmwhiF5atBKeMsQ2b8R58jwCxvI6jBFsnwMv7PKkh +s5lC8V8YBKp36UVVRLaB4x5ZL/etfwj7Dyj9EqsERm6R0ebc1ECtstbfekGLugmQ +qNhRcTu3EXpZz8oq5NJUwVBef1TJ6zwBAoIBAQC7Oq5AdmLzYOX0AYUuT3Cjgl0u +3Tq1c1uqlVaxQGjA3oqua4SR0+kvmRZbZGLpLAVnnwOjUEfUFBBYgP/5Mo/OiCkQ +C8eWkSQKXy6OFy5wh4mbL5oJttKjM4ZoB0gpF31+tGOmrfJwacqEPnyZLKzkgBdG +djVVKP+HH4XUdB2VXst8tvcif+VTrUsD1nuhGMOgbiHZkdx3eug05wfhnYvWljA+ +r/4xcq7TmKSoJqkb0OcOkhqjeZWleA6xRtEvHPGRzbEM67FVsVTxr/N9BX5tS9zu +YLCNI3tTNsDV0Ac+4rl46rghQ/n2TNSEpwvA/pjytsdPXLOpoapWirrsEiXf +-----END RSA PRIVATE KEY----- diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb index 516dd250..107c24d7 100644 --- a/spec/inputs/integration/elasticsearch_spec.rb +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -4,16 +4,17 @@ require "logstash/inputs/elasticsearch" require_relative "../../../spec/es_helper" -describe LogStash::Inputs::Elasticsearch, :integration => true do +describe LogStash::Inputs::Elasticsearch do let(:config) { { 'hosts' => [ESHelper.get_host_port], 'index' => 'logs', 'query' => '{ "query": { "match": { "message": "Not found"} }}' } } let(:plugin) { described_class.new(config) } let(:event) { LogStash::Event.new({}) } + let(:client_options) {{}} before(:each) do - @es = ESHelper.get_client + @es = ESHelper.get_client(client_options) # Delete all templates first. # Clean ES of data before we start. @es.indices.delete_template(:name => "*") @@ -31,8 +32,8 @@ @es.indices.delete(:index => "*") rescue nil end - describe 'smoke test' do - it "should retrieve json event from elasticseach" do + shared_examples 'an elasticsearch index plugin' do + it 'should retrieve json event from elasticsearch' do queue = [] plugin.run(queue) event = queue.pop @@ -40,4 +41,23 @@ expect(event.get("response")).to eql(404) end end + + describe 'against an unsecured elasticsearch', :integration => true do + it_behaves_like 'an elasticsearch index plugin' + end + + describe 'against a secured elasticsearch', :secure_integration => true do + let(:user) { 'simpleuser' } + let(:password) { 'abc123' } + let(:ca_file) { "spec/fixtures/test_certs/test.crt" } + let(:client_options) {{:ca_file => ca_file, :user => user, :password => password}} + let(:config) { super.merge({ + 'user' => user, + 'password' => password, + 'ssl' => true, + 'ca_file' => ca_file }) + } + it_behaves_like 'an elasticsearch index plugin' + end end + From 500857add90f2b0414426addaaead0e88a37be54 Mon Sep 17 00:00:00 2001 From: Karol Bucek Date: Wed, 8 Jan 2020 15:44:53 +0100 Subject: [PATCH 106/207] Feat: support cloud_id / cloud_auth configuration (#112) --- CHANGELOG.md | 5 +- docs/index.asciidoc | 25 ++++++- lib/logstash/inputs/elasticsearch.rb | 82 ++++++++++++++++++++- logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 103 +++++++++++++++++++-------- 5 files changed, 181 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3617f681..19dc273e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ +## 4.5.0 + - Feat: Added support for cloud_id / cloud_auth configuration [#112](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/112) + ## 4.4.0 - - Changed Elasticsearch Client transport to use Manticore[#111](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/111) + - Changed Elasticsearch Client transport to use Manticore [#111](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/111) ## 4.3.3 - Loosen restrictions on Elasticsearch gem [#110](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/110) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 896bd247..8d7ec191 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -87,6 +87,8 @@ This plugin supports the following configuration options plus the <> |a valid filesystem path|No +| <> |<>|No +| <> |<>|No | <> |<>|No | <> |<>|No | <> |<>|No @@ -113,8 +115,27 @@ input plugins. * Value type is <> * There is no default value for this setting. -SSL Certificate Authority file in PEM encoded format, must also -include any chain certificates as necessary. +SSL Certificate Authority file in PEM encoded format, must also include any chain certificates as necessary. + +[id="plugins-{type}s-{plugin}-cloud_auth"] +===== `cloud_auth` + + * Value type is <> + * There is no default value for this setting. + +Cloud authentication string (":" format) is an alternative for the `user`/`password` pair. + +For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_auth[Logstash-to-Cloud documentation] + +[id="plugins-{type}s-{plugin}-cloud_id"] +===== `cloud_id` + + * Value type is <> + * There is no default value for this setting. + +Cloud ID, from the Elastic Cloud web console. If set `hosts` should not be used. + +For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_id[Logstash-to-Cloud documentation] [id="plugins-{type}s-{plugin}-docinfo"] ===== `docinfo` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index bbdea5b9..e7991f0d 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -2,6 +2,7 @@ require "logstash/inputs/base" require "logstash/namespace" require "logstash/json" +require "logstash/util/safe_uri" require "base64" @@ -69,6 +70,11 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # Port defaults to 9200 config :hosts, :validate => :array + # Cloud ID, from the Elastic Cloud web console. If set `hosts` should not be used. + # + # For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_id[Logstash-to-Cloud documentation] + config :cloud_id, :validate => :string + # The index or alias to search. config :index, :validate => :string, :default => "logstash-*" @@ -134,6 +140,11 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # Basic Auth - password config :password, :validate => :password + # Cloud authentication string (":" format) is an alternative for the `user`/`password` configuration. + # + # For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_auth[Logstash-to-Cloud documentation] + config :cloud_auth, :validate => :password + # SSL config :ssl, :validate => :boolean, :default => false @@ -165,12 +176,17 @@ def register transport_options = {} + fill_user_password_from_cloud_auth + if @user && @password token = Base64.strict_encode64("#{@user}:#{@password.value}") transport_options[:headers] = { :Authorization => "Basic #{token}" } end - hosts = if @ssl then + fill_hosts_from_cloud_id + @hosts = Array(@hosts).map { |host| host.to_s } # potential SafeURI#to_s + + hosts = if @ssl @hosts.map do |h| host, port = h.split(":") { :host => host, :scheme => 'https', :port => port } @@ -275,10 +291,70 @@ def push_hit(hit, output_queue) end def scroll_request scroll_id - @client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll) + client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll) end def search_request(options) - @client.search(options) + client.search(options) + end + + attr_reader :client + + def hosts_default?(hosts) + hosts.nil? || ( hosts.is_a?(Array) && hosts.empty? ) + end + + def fill_hosts_from_cloud_id + return unless @cloud_id + + if @hosts && !hosts_default?(@hosts) + raise LogStash::ConfigurationError, 'Both cloud_id and hosts specified, please only use one of those.' + end + @hosts = parse_host_uri_from_cloud_id(@cloud_id) + end + + def fill_user_password_from_cloud_auth + return unless @cloud_auth + + if @user || @password + raise LogStash::ConfigurationError, 'Both cloud_auth and user/password specified, please only use one.' + end + @user, @password = parse_user_password_from_cloud_auth(@cloud_auth) + params['user'], params['password'] = @user, @password + end + + def parse_host_uri_from_cloud_id(cloud_id) + begin # might not be available on older LS + require 'logstash/util/cloud_setting_id' + rescue LoadError + raise LogStash::ConfigurationError, 'The cloud_id setting is not supported by your version of Logstash, ' + + 'please upgrade your installation (or set hosts instead).' + end + + begin + cloud_id = LogStash::Util::CloudSettingId.new(cloud_id) # already does append ':{port}' to host + rescue ArgumentError => e + raise LogStash::ConfigurationError, e.message.to_s.sub(/Cloud Id/i, 'cloud_id') + end + cloud_uri = "#{cloud_id.elasticsearch_scheme}://#{cloud_id.elasticsearch_host}" + LogStash::Util::SafeURI.new(cloud_uri) end + + def parse_user_password_from_cloud_auth(cloud_auth) + begin # might not be available on older LS + require 'logstash/util/cloud_setting_auth' + rescue LoadError + raise LogStash::ConfigurationError, 'The cloud_auth setting is not supported by your version of Logstash, ' + + 'please upgrade your installation (or set user/password instead).' + end + + cloud_auth = cloud_auth.value if cloud_auth.is_a?(LogStash::Util::Password) + begin + cloud_auth = LogStash::Util::CloudSettingAuth.new(cloud_auth) + rescue ArgumentError => e + raise LogStash::ConfigurationError, e.message.to_s.sub(/Cloud Auth/i, 'cloud_auth') + end + [ cloud_auth.username, cloud_auth.password ] + end + end diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 312361c4..00b3e90b 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.4.0' + s.version = '4.5.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 2528f284..5e5fa6c9 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -504,6 +504,80 @@ def synchronize_method!(object, method_name) end end + describe "client" do + let(:config) do + { + + } + end + let(:plugin) { described_class.new(config) } + let(:event) { LogStash::Event.new({}) } + + describe "cloud.id" do + let(:valid_cloud_id) do + 'sample:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvJGFjMzFlYmI5MDI0MTc3MzE1NzA0M2MzNGZkMjZmZDQ2OjkyNDMkYTRjMDYyMzBlNDhjOGZjZTdiZTg4YTA3NGEzYmIzZTA6OTI0NA==' + end + + let(:config) { super.merge({ 'cloud_id' => valid_cloud_id }) } + + it "should set host(s)" do + plugin.register + client = plugin.send(:client) + expect( client.transport.hosts ).to eql [{ + :scheme => "https", + :host => "ac31ebb90241773157043c34fd26fd46.us-central1.gcp.cloud.es.io", + :port => 9243, + :path => "", + :protocol => "https" + }] + end + + context 'invalid' do + let(:config) { super.merge({ 'cloud_id' => 'invalid:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlv' }) } + + it "should fail" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /cloud_id.*? is invalid/ + end + end + + context 'hosts also set' do + let(:config) { super.merge({ 'cloud_id' => valid_cloud_id, 'hosts' => [ 'localhost:9200' ] }) } + + it "should fail" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /cloud_id and hosts/ + end + end + end if LOGSTASH_VERSION > '6.0' + + describe "cloud.auth" do + let(:config) { super.merge({ 'cloud_auth' => LogStash::Util::Password.new('elastic:my-passwd-00') }) } + + it "should set authorization" do + plugin.register + client = plugin.send(:client) + auth_header = client.transport.options[:transport_options][:headers][:Authorization] + + expect( auth_header ).to eql "Basic #{Base64.encode64('elastic:my-passwd-00').rstrip}" + end + + context 'invalid' do + let(:config) { super.merge({ 'cloud_auth' => 'invalid-format' }) } + + it "should fail" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /cloud_auth.*? format/ + end + end + + context 'user also set' do + let(:config) { super.merge({ 'cloud_auth' => 'elastic:my-passwd-00', 'user' => 'another' }) } + + it "should fail" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /cloud_auth and user/ + end + end + end if LOGSTASH_VERSION > '6.0' + end + context "when scheduling" do let(:config) do { @@ -513,39 +587,11 @@ def synchronize_method!(object, method_name) } end - response = { - "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", - "took" => 27, - "timed_out" => false, - "_shards" => { - "total" => 169, - "successful" => 169, - "failed" => 0 - }, - "hits" => { - "total" => 1, - "max_score" => 1.0, - "hits" => [ { - "_index" => "logstash-2014.10.12", - "_type" => "logs", - "_id" => "C5b2xLQwTZa76jBmHIbwHQ", - "_score" => 1.0, - "_source" => { "message" => ["ohayo"] } - } ] - } - } - - scroll_reponse = { - "_scroll_id" => "r453Wc1jh0caLJhSDg", - "hits" => { "hits" => [] } - } - before do plugin.register end it "should properly schedule" do - Timecop.travel(Time.new(2000)) Timecop.scale(60) runner = Thread.new do @@ -564,5 +610,4 @@ def synchronize_method!(object, method_name) end end - end From 5e83092e5de4bdcde75eeec7de5ec0be1b399bfd Mon Sep 17 00:00:00 2001 From: Karol Bucek Date: Mon, 27 Jan 2020 12:49:53 +0100 Subject: [PATCH 107/207] Feat: added option to specify proxy for ES (#114) --- CHANGELOG.md | 3 +++ docs/index.asciidoc | 11 +++++++++++ lib/logstash/inputs/elasticsearch.rb | 23 +++++++++++++++++++++++ logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 23 +++++++++++++++++++++++ 5 files changed, 61 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 19dc273e..5a5bb038 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.6.0 + - Feat: added option to specify proxy for ES [#114](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/114) + ## 4.5.0 - Feat: Added support for cloud_id / cloud_auth configuration [#112](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/112) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 8d7ec191..de1cc78e 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -95,6 +95,7 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |<>|No | <> |<>|No +| <> |<>|No | <> |<>|No | <> |<>|No | <> |<>|No @@ -231,6 +232,16 @@ The password to use together with the username in the `user` option when authenticating to the Elasticsearch server. If set to an empty string authentication will be disabled. +[id="plugins-{type}s-{plugin}-proxy"] +===== `proxy` + +* Value type is <> +* There is no default value for this setting. + +Set the address of a forward HTTP proxy. +An empty string is treated as if proxy was not set, this is useful when using +environment variables e.g. `proxy => '${LS_PROXY:}'`. + [id="plugins-{type}s-{plugin}-query"] ===== `query` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index e7991f0d..5d248446 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -145,6 +145,9 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_auth[Logstash-to-Cloud documentation] config :cloud_auth, :validate => :password + # Set the address of a forward HTTP proxy. + config :proxy, :validate => :uri_or_empty + # SSL config :ssl, :validate => :boolean, :default => false @@ -197,11 +200,31 @@ def register ssl_options = { :ssl => true, :ca_file => @ca_file } if @ssl && @ca_file ssl_options ||= {} + @logger.warn "Supplied proxy setting (proxy => '') has no effect" if @proxy.eql?('') + + transport_options[:proxy] = @proxy.to_s if @proxy && !@proxy.eql?('') + @client = Elasticsearch::Client.new(:hosts => hosts, :transport_options => transport_options, :transport_class => ::Elasticsearch::Transport::Transport::HTTP::Manticore, :ssl => ssl_options) end + ## + # @override to handle proxy => '' as if none was set + # @param value [Array] + # @param validator [nil,Array,Symbol] + # @return [Array(true,Object)]: if validation is a success, a tuple containing `true` and the coerced value + # @return [Array(false,String)]: if validation is a failure, a tuple containing `false` and the failure reason. + def self.validate_value(value, validator) + return super unless validator == :uri_or_empty + + value = deep_replace(value) + value = hash_or_array(value) + + return true, value.first if value.size == 1 && value.first.empty? + + return super(value, :uri) + end def run(output_queue) if @schedule diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 00b3e90b..a71ffb57 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.5.0' + s.version = '4.6.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 5e5fa6c9..de25c87b 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -576,6 +576,29 @@ def synchronize_method!(object, method_name) end end end if LOGSTASH_VERSION > '6.0' + + describe "proxy" do + let(:config) { super.merge({ 'proxy' => 'http://localhost:1234/' }) } + + it "should set proxy" do + plugin.register + client = plugin.send(:client) + proxy = client.transport.options[:transport_options][:proxy] + + expect( proxy ).to eql "http://localhost:1234/" + end + + context 'invalid' do + let(:config) { super.merge({ 'proxy' => '${A_MISSING_ENV_VAR:}' }) } + + it "should not set proxy" do + plugin.register + client = plugin.send(:client) + + expect( client.transport.options[:transport_options] ).to_not include(:proxy) + end + end + end end context "when scheduling" do From a14b09a96e97ac0a212180a87c9b5cf9c33e6386 Mon Sep 17 00:00:00 2001 From: Karol Bucek Date: Thu, 12 Mar 2020 15:12:53 +0100 Subject: [PATCH 108/207] Test: avoid insist usage + load shared example (#118) for devutils 2.0 compatibility --- spec/inputs/elasticsearch_spec.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index de25c87b..85c08897 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1,5 +1,6 @@ # encoding: utf-8 require "logstash/devutils/rspec/spec_helper" +require "logstash/devutils/rspec/shared_examples" require "logstash/inputs/elasticsearch" require "elasticsearch" require "timecop" @@ -80,8 +81,8 @@ queue.pop end - insist { event }.is_a?(LogStash::Event) - insist { event.get("message") } == [ "ohayo" ] + expect(event).to be_a(LogStash::Event) + expect(event.get("message")).to eql [ "ohayo" ] end From 93368ba4fecd6b632cdc281a868c8e26e65e5f1e Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Tue, 17 Mar 2020 15:41:55 +0000 Subject: [PATCH 109/207] [skip ci] updated apache license --- LICENSE | 209 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 199 insertions(+), 10 deletions(-) diff --git a/LICENSE b/LICENSE index 2162c9b7..a80a3fd5 100644 --- a/LICENSE +++ b/LICENSE @@ -1,13 +1,202 @@ -Copyright (c) 2012-2018 Elasticsearch -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ - http://www.apache.org/licenses/LICENSE-2.0 + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 Elastic and contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. From ddfa79c6ef23f424e8d8683d07037af9cd51ad9f Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Thu, 26 Mar 2020 17:37:37 +0000 Subject: [PATCH 110/207] update to centralized travis configuration --- {ci => .ci}/Dockerfile.elasticsearch | 22 ++++++++-------- .ci/docker-compose.override.yml | 24 ++++++++++++++++++ {ci => .ci}/docker-run.sh | 7 +++-- {ci => .ci}/docker-setup.sh | 12 +++++---- {ci => .ci}/elasticsearch-run.sh | 0 {ci => .ci}/logstash-run.sh | 0 .travis.yml | 31 +++++++++-------------- ci/Dockerfile.logstash | 22 ---------------- ci/build.sh | 21 --------------- ci/docker-compose.yml | 38 ---------------------------- ci/docker-lsonly-compose.yml | 20 --------------- ci/setup.sh | 26 ------------------- 12 files changed, 59 insertions(+), 164 deletions(-) rename {ci => .ci}/Dockerfile.elasticsearch (50%) create mode 100644 .ci/docker-compose.override.yml rename {ci => .ci}/docker-run.sh (51%) rename {ci => .ci}/docker-setup.sh (89%) rename {ci => .ci}/elasticsearch-run.sh (100%) rename {ci => .ci}/logstash-run.sh (100%) delete mode 100644 ci/Dockerfile.logstash delete mode 100755 ci/build.sh delete mode 100644 ci/docker-compose.yml delete mode 100644 ci/docker-lsonly-compose.yml delete mode 100755 ci/setup.sh diff --git a/ci/Dockerfile.elasticsearch b/.ci/Dockerfile.elasticsearch similarity index 50% rename from ci/Dockerfile.elasticsearch rename to .ci/Dockerfile.elasticsearch index 79c89ec2..afb65800 100644 --- a/ci/Dockerfile.elasticsearch +++ b/.ci/Dockerfile.elasticsearch @@ -1,19 +1,19 @@ -ARG elastic_stack_version -FROM docker.elastic.co/elasticsearch/elasticsearch$distribution_suffix:$elastic_stack_version +ARG ELASTIC_STACK_VERSION +FROM docker.elastic.co/elasticsearch/elasticsearch$distribution_suffix:$ELASTIC_STACK_VERSION -ARG plugin_path=/usr/share/plugins/this +ARG plugin_path=/usr/share/plugins/plugin ARG es_path=/usr/share/elasticsearch ARG es_yml=$es_path/config/elasticsearch.yml -ARG secure_integration +ARG SECURE_INTEGRATION RUN rm -f $es_path/config/scripts COPY --chown=elasticsearch:elasticsearch spec/fixtures/test_certs/* $es_path/config/test_certs/ -COPY --chown=elasticsearch:elasticsearch ci/elasticsearch-run.sh $es_path/ +COPY --chown=elasticsearch:elasticsearch .ci/elasticsearch-run.sh $es_path/ -RUN if [ "$secure_integration" = "true" ] ; then echo "xpack.security.http.ssl.enabled: $secure_integration" >> $es_yml; fi -RUN if [ "$secure_integration" = "true" ] ; then echo "xpack.security.http.ssl.key: $es_path/config/test_certs/test.key" >> $es_yml; fi -RUN if [ "$secure_integration" = "true" ] ; then echo "xpack.security.http.ssl.certificate: $es_path/config/test_certs/test.crt" >> $es_yml; fi -RUN if [ "$secure_integration" = "true" ] ; then echo "xpack.security.http.ssl.certificate_authorities: [ '$es_path/config/test_certs/ca.crt' ]" >> $es_yml; fi -RUN if [ "$secure_integration" = "true" ] ; then $es_path/bin/elasticsearch-users useradd simpleuser -p abc123 -r superuser; fi -RUN if [ "$secure_integration" = "true" ] ; then $es_path/bin/elasticsearch-users useradd 'f@ncyuser' -p 'ab%12#' -r superuser; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.enabled: $SECURE_INTEGRATION" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.key: $es_path/config/test_certs/test.key" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.certificate: $es_path/config/test_certs/test.crt" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.certificate_authorities: [ '$es_path/config/test_certs/ca.crt' ]" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then $es_path/bin/elasticsearch-users useradd simpleuser -p abc123 -r superuser; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then $es_path/bin/elasticsearch-users useradd 'f@ncyuser' -p 'ab%12#' -r superuser; fi diff --git a/.ci/docker-compose.override.yml b/.ci/docker-compose.override.yml new file mode 100644 index 00000000..daabfdbf --- /dev/null +++ b/.ci/docker-compose.override.yml @@ -0,0 +1,24 @@ +version: '3' + +services: + + logstash: + command: /usr/share/plugins/plugin/.ci/logstash-run.sh + environment: + - INTEGRATION=${INTEGRATION:-false} + - SECURE_INTEGRATION=${SECURE_INTEGRATION:-false} + + elasticsearch: + build: + context: ../ + dockerfile: .ci/Dockerfile.elasticsearch + args: + - ELASTIC_STACK_VERSION=$ELASTIC_STACK_VERSION + - INTEGRATION=${INTEGRATION:-false} + - SECURE_INTEGRATION=${SECURE_INTEGRATION:-false} + command: /usr/share/elasticsearch/elasticsearch-run.sh + tty: true + ports: + - "9200:9200" + user: elasticsearch + diff --git a/ci/docker-run.sh b/.ci/docker-run.sh similarity index 51% rename from ci/docker-run.sh rename to .ci/docker-run.sh index 718f4972..02f0c42c 100755 --- a/ci/docker-run.sh +++ b/.ci/docker-run.sh @@ -2,8 +2,11 @@ # This is intended to be run inside the docker container as the command of the docker-compose. set -ex + +cd .ci + if [ "$INTEGRATION" == "true" ]; then - docker-compose -f ci/docker-compose.yml up --exit-code-from logstash + docker-compose up --exit-code-from logstash else - docker-compose -f ci/docker-lsonly-compose.yml up --exit-code-from logstash + docker-compose up --exit-code-from logstash logstash fi diff --git a/ci/docker-setup.sh b/.ci/docker-setup.sh similarity index 89% rename from ci/docker-setup.sh rename to .ci/docker-setup.sh index 8ff8a400..63f5add2 100755 --- a/ci/docker-setup.sh +++ b/.ci/docker-setup.sh @@ -1,6 +1,6 @@ #!/bin/bash -# This is intended to be run the plugin's root directory. `ci/docker-test.sh` +# This is intended to be run the plugin's root directory. `.ci/docker-setup.sh` # Ensure you have Docker installed locally and set the ELASTIC_STACK_VERSION environment variable. set -e @@ -61,12 +61,14 @@ if [ "$ELASTIC_STACK_VERSION" ]; then rm Gemfile.lock fi + cd .ci + if [ "$INTEGRATION" == "true" ]; then - docker-compose -f ci/docker-compose.yml down - docker-compose -f ci/docker-compose.yml build + docker-compose down + docker-compose build else - docker-compose -f ci/docker-lsonly-compose.yml down - docker-compose -f ci/docker-lsonly-compose.yml build + docker-compose down + docker-compose build logstash fi else echo "Please set the ELASTIC_STACK_VERSION environment variable" diff --git a/ci/elasticsearch-run.sh b/.ci/elasticsearch-run.sh similarity index 100% rename from ci/elasticsearch-run.sh rename to .ci/elasticsearch-run.sh diff --git a/ci/logstash-run.sh b/.ci/logstash-run.sh similarity index 100% rename from ci/logstash-run.sh rename to .ci/logstash-run.sh diff --git a/.travis.yml b/.travis.yml index 55b79bf5..4c3b3f8e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,20 +1,13 @@ ---- -sudo: required -services: docker -addons: - apt: - packages: - - docker-ce +import: +- logstash-plugins/.ci:travis/travis.yml@1.x + env: - - INTEGRATION=false ELASTIC_STACK_VERSION=5.x - - INTEGRATION=false ELASTIC_STACK_VERSION=6.x - - INTEGRATION=false ELASTIC_STACK_VERSION=7.x - - INTEGRATION=false ELASTIC_STACK_VERSION=7.x SNAPSHOT=true - - INTEGRATION=false ELASTIC_STACK_VERSION=8.x SNAPSHOT=true - - INTEGRATION=true ELASTIC_STACK_VERSION=6.x - - INTEGRATION=true ELASTIC_STACK_VERSION=7.x - - INTEGRATION=true ELASTIC_STACK_VERSION=7.x SNAPSHOT=true - - INTEGRATION=true ELASTIC_STACK_VERSION=8.x SNAPSHOT=true - - SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x -install: ci/docker-setup.sh -script: ci/docker-run.sh \ No newline at end of file +- INTEGRATION=false ELASTIC_STACK_VERSION=6.x +- INTEGRATION=false ELASTIC_STACK_VERSION=7.x +- INTEGRATION=false ELASTIC_STACK_VERSION=7.x SNAPSHOT=true +- INTEGRATION=false ELASTIC_STACK_VERSION=8.x SNAPSHOT=true +- INTEGRATION=true ELASTIC_STACK_VERSION=6.x +- INTEGRATION=true ELASTIC_STACK_VERSION=7.x +- INTEGRATION=true ELASTIC_STACK_VERSION=7.x SNAPSHOT=true +- INTEGRATION=true ELASTIC_STACK_VERSION=8.x SNAPSHOT=true +- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x diff --git a/ci/Dockerfile.logstash b/ci/Dockerfile.logstash deleted file mode 100644 index 4ff76b9e..00000000 --- a/ci/Dockerfile.logstash +++ /dev/null @@ -1,22 +0,0 @@ -ARG elastic_stack_version -FROM docker.elastic.co/logstash/logstash:$elastic_stack_version -USER root -RUN yum install -y openssl -USER logstash -ENV JARS_SKIP="true" -ENV LOGSTASH_SOURCE=1 -ENV LS_JAVA_OPTS="-Xmx256m -Xms256m" -ENV PATH="${PATH}:/usr/share/logstash/vendor/jruby/bin" -COPY --chown=logstash:logstash Gemfile /usr/share/plugins/this/Gemfile -COPY --chown=logstash:logstash *.gemspec /usr/share/plugins/this/ -RUN cp /usr/share/logstash/logstash-core/versions-gem-copy.yml /usr/share/logstash/versions.yml -RUN gem install bundler -v '< 2' -WORKDIR /usr/share/plugins/this -RUN bundle install --path=vendor/bundler -COPY --chown=logstash:logstash . /usr/share/plugins/this -ARG elastic_stack_version -ARG integration -ARG secure_integration -ENV INTEGRATION $integration -ENV SECURE_INTEGRATION $secure_integration -ENV ELASTIC_STACK_VERSION $elastic_stack_version diff --git a/ci/build.sh b/ci/build.sh deleted file mode 100755 index 06caffdc..00000000 --- a/ci/build.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -# version: 1 -######################################################## -# -# AUTOMATICALLY GENERATED! DO NOT EDIT -# -######################################################## -set -e - -echo "Starting build process in: `pwd`" -source ./ci/setup.sh - -if [[ -f "ci/run.sh" ]]; then - echo "Running custom build script in: `pwd`/ci/run.sh" - source ./ci/run.sh -else - echo "Running default build scripts in: `pwd`/ci/build.sh" - bundle install - bundle exec rake vendor - bundle exec rspec spec -fi diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml deleted file mode 100644 index e3fc218d..00000000 --- a/ci/docker-compose.yml +++ /dev/null @@ -1,38 +0,0 @@ -version: '3' - -# run tests: cd ci; docker-compose up --build --force-recreate -# manual: cd ci; docker-compose run logstash bash -services: - - logstash: - build: - context: ../ - dockerfile: ci/Dockerfile.logstash - args: - - elastic_stack_version=$ELASTIC_STACK_VERSION - - integration=${INTEGRATION:-false} - - secure_integration=${SECURE_INTEGRATION:-false} - command: /usr/share/plugins/this/ci/logstash-run.sh - environment: - - SPEC_OPTS - tty: true - #volumes: - # - ./:/usr/share/plugins/this - - elasticsearch: - build: - context: ../ - dockerfile: ci/Dockerfile.elasticsearch - args: - - elastic_stack_version=$ELASTIC_STACK_VERSION - - integration=${INTEGRATION:-false} - - secure_integration=${SECURE_INTEGRATION:-false} - command: /usr/share/elasticsearch/elasticsearch-run.sh - tty: true - ports: - - "9200:9200" - user: elasticsearch - - #volumes: - # - ./:/usr/share/plugins/this - diff --git a/ci/docker-lsonly-compose.yml b/ci/docker-lsonly-compose.yml deleted file mode 100644 index fb38ffbe..00000000 --- a/ci/docker-lsonly-compose.yml +++ /dev/null @@ -1,20 +0,0 @@ -version: '3' - -# run tests: cd ci; docker-compose up --build --force-recreate -# manual: cd ci; docker-compose run logstash bash -services: - - logstash: - build: - context: ../ - dockerfile: ci/Dockerfile.logstash - args: - - elastic_stack_version=$ELASTIC_STACK_VERSION - - integration=${INTEGRATION:-false} - command: /usr/share/plugins/this/ci/logstash-run.sh - environment: - - SPEC_OPTS - tty: true - #volumes: - # - ./:/usr/share/plugins/this - diff --git a/ci/setup.sh b/ci/setup.sh deleted file mode 100755 index 835fa437..00000000 --- a/ci/setup.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -# version: 1 -######################################################## -# -# AUTOMATICALLY GENERATED! DO NOT EDIT -# -######################################################## -set -e -if [ "$LOGSTASH_BRANCH" ]; then - echo "Building plugin using Logstash source" - BASE_DIR=`pwd` - echo "Checking out branch: $LOGSTASH_BRANCH" - git clone -b $LOGSTASH_BRANCH https://github.com/elastic/logstash.git ../../logstash --depth 1 - printf "Checked out Logstash revision: %s\n" "$(git -C ../../logstash rev-parse HEAD)" - cd ../../logstash - echo "Building plugins with Logstash version:" - cat versions.yml - echo "---" - # We need to build the jars for that specific version - echo "Running gradle assemble in: `pwd`" - ./gradlew assemble - cd $BASE_DIR - export LOGSTASH_SOURCE=1 -else - echo "Building plugin using released gems on rubygems" -fi From 9f356212ba3f906c707740944d47b7ee9313d2c5 Mon Sep 17 00:00:00 2001 From: Karen Metts <35154725+karenzone@users.noreply.github.com> Date: Tue, 28 Apr 2020 14:10:25 -0400 Subject: [PATCH 111/207] [DOC]Remove outdated compatibility notice (#124) The compatibility notice is outdated. Document type is not relevant for the input plugin. Fixes #123 --- CHANGELOG.md | 3 +++ docs/index.asciidoc | 15 --------------- logstash-input-elasticsearch.gemspec | 2 +- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a5bb038..7f5e0a99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.6.1 + - [DOC] Removed outdated compatibility notice [#124](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/124) + ## 4.6.0 - Feat: added option to specify proxy for ES [#114](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/114) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index de1cc78e..1037efe8 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -21,16 +21,6 @@ include::{include_path}/plugin_header.asciidoc[] ==== Description -.Compatibility Note -[NOTE] -================================================================================ -Starting with Elasticsearch 5.3, there's an {ref}/modules-http.html[HTTP setting] -called `http.content_type.required`. If this option is set to `true`, and you -are using Logstash 2.4 through 5.2, you need to update the Elasticsearch input -plugin to version 4.0.2 or higher. - -================================================================================ - Read from an Elasticsearch cluster, based on search query results. This is useful for replaying test logs, reindexing, etc. You can periodically schedule ingestion using a cron syntax @@ -174,11 +164,6 @@ Example } -NOTE: Starting with Logstash 6.0, the `document_type` option is -deprecated due to the -https://www.elastic.co/guide/en/elasticsearch/reference/6.0/removal-of-types.html[removal of types in Logstash 6.0]. -It will be removed in the next major version of Logstash. - [id="plugins-{type}s-{plugin}-docinfo_fields"] ===== `docinfo_fields` diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index a71ffb57..00e6b2bf 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.6.0' + s.version = '4.6.1' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From ff115f1f937727f40b0b0c32b058ab564538b6a8 Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Thu, 21 May 2020 15:25:56 -0400 Subject: [PATCH 112/207] call client API clear_scroll after each search request and rescue scroll_request exceptions --- lib/logstash/inputs/elasticsearch.rb | 58 ++++++++++++++++++++-------- spec/inputs/elasticsearch_spec.rb | 14 ++++++- 2 files changed, 53 insertions(+), 19 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 5d248446..f10dafbf 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -204,9 +204,12 @@ def register transport_options[:proxy] = @proxy.to_s if @proxy && !@proxy.eql?('') - @client = Elasticsearch::Client.new(:hosts => hosts, :transport_options => transport_options, - :transport_class => ::Elasticsearch::Transport::Transport::HTTP::Manticore, - :ssl => ssl_options) + @client = Elasticsearch::Client.new( + :hosts => hosts, + :transport_options => transport_options, + :transport_class => ::Elasticsearch::Transport::Transport::HTTP::Manticore, + :ssl => ssl_options + ) end ## @@ -266,25 +269,41 @@ def do_run_slice(output_queue, slice_id=nil) slice_options = @options.merge(:body => LogStash::Json.dump(slice_query) ) logger.info("Slice starting", slice_id: slice_id, slices: @slices) unless slice_id.nil? - r = search_request(slice_options) - r['hits']['hits'].each { |hit| push_hit(hit, output_queue) } - logger.debug("Slice progress", slice_id: slice_id, slices: @slices) unless slice_id.nil? - - has_hits = r['hits']['hits'].any? + scroll_id = nil + begin + r = search_request(slice_options) - while has_hits && r['_scroll_id'] && !stop? - r = process_next_scroll(output_queue, r['_scroll_id']) + r['hits']['hits'].each { |hit| push_hit(hit, output_queue) } logger.debug("Slice progress", slice_id: slice_id, slices: @slices) unless slice_id.nil? - has_hits = r['has_hits'] + + has_hits = r['hits']['hits'].any? + scroll_id = r['_scroll_id'] + + while has_hits && scroll_id && !stop? + has_hits, scroll_id = process_next_scroll(output_queue, scroll_id) + logger.debug("Slice progress", slice_id: slice_id, slices: @slices) if logger.debug? && slice_id + end + logger.info("Slice complete", slice_id: slice_id, slices: @slices) unless slice_id.nil? + ensure + clear_scroll(scroll_id) end - logger.info("Slice complete", slice_id: slice_id, slices: @slices) unless slice_id.nil? end + ## + # @param output_queue [#<<] + # @param scroll_id [String]: a scroll id to resume + # @return [Array(Boolean,String)]: a tuple representing whether the response + # def process_next_scroll(output_queue, scroll_id) r = scroll_request(scroll_id) r['hits']['hits'].each { |hit| push_hit(hit, output_queue) } - {'has_hits' => r['hits']['hits'].any?, '_scroll_id' => r['_scroll_id']} + [r['hits']['hits'].any?, r['_scroll_id']] + rescue => e + # this will typically be triggered by a scroll timeout + logger.error("Scroll request error, aborting scroll", error: e.inspect) + # return no hits and original scroll_id so we can try to clear it + [false, scroll_id] end def push_hit(hit, output_queue) @@ -313,16 +332,21 @@ def push_hit(hit, output_queue) output_queue << event end + def clear_scroll(scroll_id) + @client.clear_scroll(scroll_id: scroll_id) if scroll_id + rescue => e + # ignore & log any clear_scroll errors + logger.warn("Ignoring clear_scroll exception", message: e.message) + end + def scroll_request scroll_id - client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll) + @client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll) end def search_request(options) - client.search(options) + @client.search(options) end - attr_reader :client - def hosts_default?(hosts) hosts.nil? || ( hosts.is_a?(Array) && hosts.empty? ) end diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 85c08897..4027767c 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -8,9 +8,13 @@ require "time" require "date" -describe LogStash::Inputs::Elasticsearch do +class LogStash::Inputs::TestableElasticsearch < LogStash::Inputs::Elasticsearch + attr_reader :client +end + +describe LogStash::Inputs::TestableElasticsearch do - let(:plugin) { LogStash::Inputs::Elasticsearch.new(config) } + let(:plugin) { LogStash::Inputs::TestableElasticsearch.new(config) } let(:queue) { Queue.new } it_behaves_like "an interruptible input plugin" do @@ -32,6 +36,7 @@ } allow(esclient).to receive(:search) { { "hits" => { "hits" => [hit] } } } allow(esclient).to receive(:scroll) { { "hits" => { "hits" => [hit] } } } + allow(esclient).to receive(:clear_scroll).and_return(nil) end end @@ -76,6 +81,7 @@ expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) expect(client).to receive(:search).with(any_args).and_return(response) expect(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(scroll_reponse) + expect(client).to receive(:clear_scroll).and_return(nil) event = input(config) do |pipeline, queue| queue.pop @@ -257,6 +263,8 @@ def synchronize_method!(object, method_name) expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) plugin.register + expect(client).to receive(:clear_scroll).and_return(nil) + # SLICE0 is a three-page scroll in which the last page is empty slice0_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 0, 'max' => 2})) expect(client).to receive(:search).with(hash_including(:body => slice0_query)).and_return(slice0_response0) @@ -360,6 +368,7 @@ def synchronize_method!(object, method_name) expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) expect(client).to receive(:search).with(any_args).and_return(response) allow(client).to receive(:scroll).with({ :body => {:scroll_id => "cXVlcnlUaGVuRmV0Y2g"}, :scroll => "1m" }).and_return(scroll_reponse) + allow(client).to receive(:clear_scroll).and_return(nil) end context 'when defining docinfo' do @@ -405,6 +414,7 @@ def synchronize_method!(object, method_name) "docinfo_target" => 'metadata_with_string' } } it 'thows an exception if the `docinfo_target` exist but is not of type hash' do + expect(client).not_to receive(:clear_scroll) plugin.register expect { plugin.run([]) }.to raise_error(Exception, /incompatible event/) end From a4ca4ae0ea16ee568579f126cf5a4a17e59e1f80 Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Fri, 29 May 2020 10:43:55 -0400 Subject: [PATCH 113/207] bump version to 4.6.2 --- CHANGELOG.md | 3 +++ logstash-input-elasticsearch.gemspec | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f5e0a99..25abffa3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.6.2 + - Added scroll clearing and better handling of scroll expiration [#128](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/128) + ## 4.6.1 - [DOC] Removed outdated compatibility notice [#124](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/124) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 00e6b2bf..19445327 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.6.1' + s.version = '4.6.2' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 013eb29672d0e6af6ea24de7cf050b288656a411 Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Fri, 29 May 2020 15:11:40 -0400 Subject: [PATCH 114/207] add support for api_key --- docs/index.asciidoc | 23 ++++++- lib/logstash/inputs/elasticsearch.rb | 97 +++++++++++++++++++--------- spec/inputs/elasticsearch_spec.rb | 32 ++++++++- 3 files changed, 119 insertions(+), 33 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 1037efe8..b54c6ce5 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -68,6 +68,16 @@ Further documentation describing this syntax can be found https://github.com/jmettraux/rufus-scheduler#parsing-cronlines-and-time-strings[here]. +[id="plugins-{type}s-{plugin}-auth"] +==== Authentication + +Authentication to a secure Elasticsearch cluster is possible using _one_ of the following options: + +* <> AND <> +* <> +* <> + + [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Input Configuration Options @@ -76,6 +86,7 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |a valid filesystem path|No | <> |<>|No | <> |<>|No @@ -100,6 +111,16 @@ input plugins.   +[id="plugins-{type}s-{plugin}-api_key"] +===== `api_key` + + * Value type is <> + * There is no default value for this setting. + +Authenticate using Elasticsearch API key. Note that this option also requires enabling the `ssl` option. + +Format is `id:api_key` where `id` and `api_key` are as returned by the Elasticsearch https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html[Create API key API]. + [id="plugins-{type}s-{plugin}-ca_file"] ===== `ca_file` @@ -315,4 +336,4 @@ empty string authentication will be disabled. [id="plugins-{type}s-{plugin}-common-options"] include::{include_path}/{type}.asciidoc[] -:default_codec!: \ No newline at end of file +:default_codec!: diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index f10dafbf..a4cf8bda 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -70,11 +70,6 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # Port defaults to 9200 config :hosts, :validate => :array - # Cloud ID, from the Elastic Cloud web console. If set `hosts` should not be used. - # - # For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_id[Logstash-to-Cloud documentation] - config :cloud_id, :validate => :string - # The index or alias to search. config :index, :validate => :string, :default => "logstash-*" @@ -140,11 +135,20 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # Basic Auth - password config :password, :validate => :password + # Cloud ID, from the Elastic Cloud web console. If set `hosts` should not be used. + # + # For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_id[Logstash-to-Cloud documentation] + config :cloud_id, :validate => :string + # Cloud authentication string (":" format) is an alternative for the `user`/`password` configuration. # # For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_auth[Logstash-to-Cloud documentation] config :cloud_auth, :validate => :password + # Authenticate using Elasticsearch API key. + # format is id:api_key (as returned by https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html[Create API key]) + config :api_key, :validate => :password + # Set the address of a forward HTTP proxy. config :proxy, :validate => :uri_or_empty @@ -177,28 +181,17 @@ def register @slices < 1 && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `slices` option must be greater than zero, got `#{@slices}`") end - transport_options = {} - + validate_authentication fill_user_password_from_cloud_auth + fill_hosts_from_cloud_id - if @user && @password - token = Base64.strict_encode64("#{@user}:#{@password.value}") - transport_options[:headers] = { :Authorization => "Basic #{token}" } - end - fill_hosts_from_cloud_id - @hosts = Array(@hosts).map { |host| host.to_s } # potential SafeURI#to_s + transport_options = {:headers => {}} + transport_options[:headers].merge!(setup_basic_auth(user, password)) + transport_options[:headers].merge!(setup_api_key(api_key)) - hosts = if @ssl - @hosts.map do |h| - host, port = h.split(":") - { :host => host, :scheme => 'https', :port => port } - end - else - @hosts - end - ssl_options = { :ssl => true, :ca_file => @ca_file } if @ssl && @ca_file - ssl_options ||= {} + hosts = setup_hosts + ssl_options = setup_ssl @logger.warn "Supplied proxy setting (proxy => '') has no effect" if @proxy.eql?('') @@ -351,25 +344,67 @@ def hosts_default?(hosts) hosts.nil? || ( hosts.is_a?(Array) && hosts.empty? ) end - def fill_hosts_from_cloud_id - return unless @cloud_id + def validate_authentication + authn_options = 0 + authn_options += 1 if @cloud_auth + authn_options += 1 if (@api_key && @api_key.value) + authn_options += 1 if (@user || (@password && @password.value)) - if @hosts && !hosts_default?(@hosts) - raise LogStash::ConfigurationError, 'Both cloud_id and hosts specified, please only use one of those.' + if authn_options > 1 + raise LogStash::ConfigurationError, 'Multiple authentication options are specified, please only use one of user/password, cloud_auth or api_key' end - @hosts = parse_host_uri_from_cloud_id(@cloud_id) + + if @api_key && @api_key.value && @ssl != true + raise(LogStash::ConfigurationError, "Using api_key authentication requires SSL/TLS secured communication using the `ssl => true` option") + end + end + + def setup_ssl + @ssl && @ca_file ? { :ssl => true, :ca_file => @ca_file } : {} + end + + def setup_hosts + @hosts = Array(@hosts).map { |host| host.to_s } # potential SafeURI#to_s + if @ssl + @hosts.map do |h| + host, port = h.split(":") + { :host => host, :scheme => 'https', :port => port } + end + else + @hosts + end + end + + def setup_basic_auth(user, password) + return {} unless user && password && password.value + + token = ::Base64.strict_encode64("#{user}:#{password.value}") + { Authorization: "Basic #{token}" } + end + + def setup_api_key(api_key) + return {} unless (api_key && api_key.value) + + token = ::Base64.strict_encode64(api_key.value) + { Authorization: "ApiKey #{token}" } end def fill_user_password_from_cloud_auth return unless @cloud_auth - if @user || @password - raise LogStash::ConfigurationError, 'Both cloud_auth and user/password specified, please only use one.' - end @user, @password = parse_user_password_from_cloud_auth(@cloud_auth) params['user'], params['password'] = @user, @password end + def fill_hosts_from_cloud_id + return unless @cloud_id + + if @hosts && !hosts_default?(@hosts) + raise LogStash::ConfigurationError, 'Both cloud_id and hosts specified, please only use one of those.' + end + @hosts = parse_host_uri_from_cloud_id(@cloud_id) + end + def parse_host_uri_from_cloud_id(cloud_id) begin # might not be available on older LS require 'logstash/util/cloud_setting_id' diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 4027767c..4832f5e9 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -583,7 +583,37 @@ def synchronize_method!(object, method_name) let(:config) { super.merge({ 'cloud_auth' => 'elastic:my-passwd-00', 'user' => 'another' }) } it "should fail" do - expect { plugin.register }.to raise_error LogStash::ConfigurationError, /cloud_auth and user/ + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Multiple authentication options are specified/ + end + end + end if LOGSTASH_VERSION > '6.0' + + describe "api_key" do + context "without ssl" do + let(:config) { super.merge({ 'api_key' => LogStash::Util::Password.new('foo:bar') }) } + + it "should fail" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /api_key authentication requires SSL\/TLS/ + end + end + + context "with ssl" do + let(:config) { super.merge({ 'api_key' => LogStash::Util::Password.new('foo:bar'), "ssl" => true }) } + + it "should set authorization" do + plugin.register + client = plugin.send(:client) + auth_header = client.transport.options[:transport_options][:headers][:Authorization] + + expect( auth_header ).to eql "ApiKey #{Base64.strict_encode64('foo:bar')}" + end + + context 'user also set' do + let(:config) { super.merge({ 'api_key' => 'foo:bar', 'user' => 'another' }) } + + it "should fail" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Multiple authentication options are specified/ + end end end end if LOGSTASH_VERSION > '6.0' From 17e10bee06f154c5d61e27d922f149806ea0152d Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Fri, 29 May 2020 15:22:13 -0400 Subject: [PATCH 115/207] bump to version 4.7.0 --- CHANGELOG.md | 3 +++ logstash-input-elasticsearch.gemspec | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25abffa3..fb1e0b70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.7.0 + - Added api_key support [#131](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/131) + ## 4.6.2 - Added scroll clearing and better handling of scroll expiration [#128](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/128) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 19445327..7511ac72 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.6.2' + s.version = '4.7.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From decc77a40824befe030c3f902f31d265f7e72405 Mon Sep 17 00:00:00 2001 From: Rob Bavey Date: Fri, 12 Jun 2020 15:15:17 -0400 Subject: [PATCH 116/207] Pass `ELASTIC_STACK_VERSION` env variable to docker image (#132) ELASTIC_STACK_VERSION was not being passed down to the docker image causing the ESHelper.version_satisfies? helper method report the wrong version, causing tests to fail due to doc types being incorrectly set. --- .ci/docker-compose.override.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/docker-compose.override.yml b/.ci/docker-compose.override.yml index daabfdbf..7b364c3c 100644 --- a/.ci/docker-compose.override.yml +++ b/.ci/docker-compose.override.yml @@ -7,7 +7,7 @@ services: environment: - INTEGRATION=${INTEGRATION:-false} - SECURE_INTEGRATION=${SECURE_INTEGRATION:-false} - + - ELASTIC_STACK_VERSION=$ELASTIC_STACK_VERSION elasticsearch: build: context: ../ From c543588afe517f29f0468cade94d236a203a55f3 Mon Sep 17 00:00:00 2001 From: Luca Belluccini Date: Wed, 5 Aug 2020 22:13:59 +0200 Subject: [PATCH 117/207] [DOC]Add usage example of add_field with docinfo metadata (#98) --- docs/index.asciidoc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index b54c6ce5..03a4a781 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -184,6 +184,19 @@ Example } } +If set, it will be possible to use the metadata information in the <> common option. + +Example +[source, ruby] + input { + elasticsearch { + docinfo => true + add_field => { + identifier => %{[@metadata][_index]}:%{[@metadata][_type]}:%{[@metadata][_id]}" + } + } + } + [id="plugins-{type}s-{plugin}-docinfo_fields"] ===== `docinfo_fields` From bf3fb52eeac14659560136e96726c57080f26ddc Mon Sep 17 00:00:00 2001 From: Karen Metts <35154725+karenzone@users.noreply.github.com> Date: Thu, 6 Aug 2020 08:48:13 -0400 Subject: [PATCH 118/207] Doc:Update sliced scroll link to resolve to new location (#135) Fix link that was broken by elasticsearch doc improvements and restructuring Bump to version to 4.7.1 --- CHANGELOG.md | 4 ++++ docs/index.asciidoc | 6 +++--- logstash-input-elasticsearch.gemspec | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb1e0b70..5a0cfbbe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 4.7.1 + - [DOC] Updated sliced scroll link to resolve to correct location after doc structure change [#135](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/135) + - [DOC] Added usage example of docinfo metadata [#98](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/98) + ## 4.7.0 - Added api_key support [#131](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/131) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 03a4a781..9ec59cce 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -184,7 +184,7 @@ Example } } -If set, it will be possible to use the metadata information in the <> common option. +If set, you can use metadata information in the <> common option. Example [source, ruby] @@ -309,8 +309,8 @@ This allows you to set the maximum number of hits returned per scroll. * Sensible values range from 2 to about 8. In some cases, it is possible to improve overall throughput by consuming multiple -distinct slices of a query simultaneously using the -https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html#sliced-scroll[Sliced Scroll API], +distinct slices of a query simultaneously using +https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html#slice-scroll[sliced scrolls], especially if the pipeline is spending significant time waiting on Elasticsearch to provide results. diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 7511ac72..d935f046 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.7.0' + s.version = '4.7.1' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 89c7353c625e02599ce755bd3362e5bf4a0088a5 Mon Sep 17 00:00:00 2001 From: Ry Biesemeyer Date: Mon, 17 Aug 2020 07:14:42 -0700 Subject: [PATCH 119/207] make connection, request, and socket timeouts configurable (#134) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor: extract :uri_or_empty validator to extension module * make connection, request, and socket timeouts configurable default values were chosen from the current default behaviour of these transport options as provided by the Manticore transport adapter we provide to the Elasticsearch Client: > ~~~ > request_timeout (integer) — default: 60 — Sets the timeout for requests. > connect_timeout (integer) — default: 10 — Sets the timeout for connections. > socket_timeout (integer) — default: 10 — Sets `SO_TIMEOUT` for open connections. A value of 0 is an infinite timeout. > ~~~ > > -- [Manticore::Client#initialize](https://www.rubydoc.info/gems/manticore/Manticore/Client#initialize-instance_method) I have elected _not_ to allow an infinite `socket_timeout` by requiring the provided value to be positive, as I can see no valid use-case of such using Elasticsearch's HTTP API. * timeouts: increase default socket timeout to 60s Mirrors behaviour of prior `Net::HTTP` adapter used in version 4.3 of this plugin, which had a read timeout of 60s. * bump version to 4.8.0 --- CHANGELOG.md | 3 ++ docs/index.asciidoc | 31 +++++++++++++ lib/logstash/inputs/elasticsearch.rb | 65 +++++++++++++++++++++------- logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 48 ++++++++++++++++++++ 5 files changed, 133 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a0cfbbe..7897643e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.8.0 + - Added the ability to configure connection-, request-, and socket-timeouts with `connect_timeout_seconds`, `request_timeout_seconds`, and `socket_timeout_seconds` [#121](https://github.com/logstash-plugins/logstash-input-elasticsearch/issues/121) + ## 4.7.1 - [DOC] Updated sliced scroll link to resolve to correct location after doc structure change [#135](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/135) - [DOC] Added usage example of docinfo metadata [#98](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/98) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 9ec59cce..ec6eb8a5 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -90,6 +90,7 @@ This plugin supports the following configuration options plus the <> |a valid filesystem path|No | <> |<>|No | <> |<>|No +| <> | <>|No | <> |<>|No | <> |<>|No | <> |<>|No @@ -98,11 +99,13 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |<>|No | <> |<>|No +| <> | <>|No | <> |<>|No | <> |<>|No | <> |<>|No | <> |<>|No | <> |<>|No +| <> | <>|No | <> |<>|No |======================================================================= @@ -149,6 +152,15 @@ Cloud ID, from the Elastic Cloud web console. If set `hosts` should not be used. For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_id[Logstash-to-Cloud documentation] +[id="plugins-{type}s-{plugin}-connect_timeout_seconds"] +===== `connect_timeout_seconds` + + * Value type is <> + * Default value is `10` + +The maximum amount of time, in seconds, to wait while establishing a connection to Elasticsearch. +Connect timeouts tend to occur when Elasticsearch or an intermediate proxy is overloaded with requests and has exhausted its connection pool. + [id="plugins-{type}s-{plugin}-docinfo"] ===== `docinfo` @@ -271,6 +283,16 @@ The query to be executed. Read the https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html[Elasticsearch query DSL documentation] for more information. +[id="plugins-{type}s-{plugin}-request_timeout_seconds"] +===== `request_timeout_seconds` + + * Value type is <> + * Default value is `60` + +The maximum amount of time, in seconds, for a single request to Elasticsearch. +Request timeouts tend to occur when an individual page of data is very large, such as when it contains large-payload +documents and/or the <> has been specified as a large value. + [id="plugins-{type}s-{plugin}-schedule"] ===== `schedule` @@ -334,6 +356,15 @@ instructions into the query. If enabled, SSL will be used when communicating with the Elasticsearch server (i.e. HTTPS will be used instead of plain HTTP). +[id="plugins-{type}s-{plugin}-socket_timeout_seconds"] +===== `socket_timeout_seconds` + + * Value type is <> + * Default value is `60` + +The maximum amount of time, in seconds, to wait on an incomplete response from Elasticsearch while no additional data has been appended. +Socket timeouts usually occur while waiting for the first byte of a response, such as when executing a particularly complex query. + [id="plugins-{type}s-{plugin}-user"] ===== `user` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index a4cf8bda..0c8c4b09 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -135,6 +135,15 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # Basic Auth - password config :password, :validate => :password + # Connection Timeout, in Seconds + config :connect_timeout_seconds, :validate => :positive_whole_number, :default => 10 + + # Request Timeout, in Seconds + config :request_timeout_seconds, :validate => :positive_whole_number, :default => 60 + + # Socket Timeout, in Seconds + config :socket_timeout_seconds, :validate => :positive_whole_number, :default => 60 + # Cloud ID, from the Elastic Cloud web console. If set `hosts` should not be used. # # For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_id[Logstash-to-Cloud documentation] @@ -189,6 +198,9 @@ def register transport_options = {:headers => {}} transport_options[:headers].merge!(setup_basic_auth(user, password)) transport_options[:headers].merge!(setup_api_key(api_key)) + transport_options[:request_timeout] = @request_timeout_seconds unless @request_timeout_seconds.nil? + transport_options[:connect_timeout] = @connect_timeout_seconds unless @connect_timeout_seconds.nil? + transport_options[:socket_timeout] = @socket_timeout_seconds unless @socket_timeout_seconds.nil? hosts = setup_hosts ssl_options = setup_ssl @@ -205,22 +217,7 @@ def register ) end - ## - # @override to handle proxy => '' as if none was set - # @param value [Array] - # @param validator [nil,Array,Symbol] - # @return [Array(true,Object)]: if validation is a success, a tuple containing `true` and the coerced value - # @return [Array(false,String)]: if validation is a failure, a tuple containing `false` and the failure reason. - def self.validate_value(value, validator) - return super unless validator == :uri_or_empty - - value = deep_replace(value) - value = hash_or_array(value) - return true, value.first if value.size == 1 && value.first.empty? - - return super(value, :uri) - end def run(output_queue) if @schedule @@ -439,4 +436,42 @@ def parse_user_password_from_cloud_auth(cloud_auth) [ cloud_auth.username, cloud_auth.password ] end + module URIOrEmptyValidator + ## + # @override to provide :uri_or_empty validator + # @param value [Array] + # @param validator [nil,Array,Symbol] + # @return [Array(true,Object)]: if validation is a success, a tuple containing `true` and the coerced value + # @return [Array(false,String)]: if validation is a failure, a tuple containing `false` and the failure reason. + def validate_value(value, validator) + return super unless validator == :uri_or_empty + + value = deep_replace(value) + value = hash_or_array(value) + + return true, value.first if value.size == 1 && value.first.empty? + + return super(value, :uri) + end + end + extend(URIOrEmptyValidator) + + module PositiveWholeNumberValidator + ## + # @override to provide :positive_whole_number validator + # @param value [Array] + # @param validator [nil,Array,Symbol] + # @return [Array(true,Object)]: if validation is a success, a tuple containing `true` and the coerced value + # @return [Array(false,String)]: if validation is a failure, a tuple containing `false` and the failure reason. + def validate_value(value, validator) + return super unless validator == :positive_whole_number + + is_number, coerced_number = super(value, :number) + + return [true, coerced_number.to_i] if is_number && coerced_number.denominator == 1 && coerced_number > 0 + + return [false, "Expected positive whole number, got `#{value.inspect}`"] + end + end + extend(PositiveWholeNumberValidator) end diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index d935f046..4ba81233 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.7.1' + s.version = '4.8.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 4832f5e9..0ecc3ac6 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -640,6 +640,54 @@ def synchronize_method!(object, method_name) end end end + + shared_examples'configurable timeout' do |config_name, manticore_transport_option| + let(:config_value) { fail NotImplementedError } + let(:config) { super().merge(config_name => config_value) } + { + :string => 'banana', + :negative => -123, + :zero => 0, + }.each do |value_desc, value| + let(:config_value) { value } + context "with an invalid #{value_desc} value" do + it 'prevents instantiation with a helpful message' do + expect(described_class.logger).to receive(:error).with(/Expected positive whole number/) + expect { described_class.new(config) }.to raise_error(LogStash::ConfigurationError) + end + end + end + + context 'with a valid value' do + let(:config_value) { 17 } + + it "instantiates the elasticsearch client with the timeout value set via #{manticore_transport_option} in the transport options" do + expect(Elasticsearch::Client).to receive(:new) do |new_elasticsearch_client_params| + # We rely on Manticore-specific transport options, fail early if we are using a different + # transport or are allowing the client to determine its own transport class. + expect(new_elasticsearch_client_params).to include(:transport_class) + expect(new_elasticsearch_client_params[:transport_class].name).to match(/\bManticore\b/) + + expect(new_elasticsearch_client_params).to include(:transport_options) + transport_options = new_elasticsearch_client_params[:transport_options] + expect(transport_options).to include(manticore_transport_option) + expect(transport_options[manticore_transport_option]).to eq(config_value.to_i) + end + + plugin.register + end + end + end + + context 'connect_timeout_seconds' do + include_examples('configurable timeout', 'connect_timeout_seconds', :connect_timeout) + end + context 'request_timeout_seconds' do + include_examples('configurable timeout', 'request_timeout_seconds', :request_timeout) + end + context 'socket_timeout_seconds' do + include_examples('configurable timeout', 'socket_timeout_seconds', :socket_timeout) + end end context "when scheduling" do From da39201f12a95e3c76e10938239d0e267ca5a388 Mon Sep 17 00:00:00 2001 From: andsel Date: Thu, 20 Aug 2020 12:41:08 +0200 Subject: [PATCH 120/207] Fix concurrency bug that avoided to retrieve connection when using multiple slices Patched the RoudRobin policy used by the underlying elasticsearch-transport gem. In case of multiple slices there are one thread per slice and the library exposed a race condition while not protecting the access to the connections pool. Fixes: #133 --- CHANGELOG.md | 3 ++ lib/logstash/inputs/elasticsearch.rb | 1 + lib/logstash/inputs/patch.rb | 48 ++++++++++++++++++++++++++++ logstash-input-elasticsearch.gemspec | 2 +- 4 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 lib/logstash/inputs/patch.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 7897643e..66185a18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.8.1 + - Fixed connection error when using multiple `slices`. [#133](https://github.com/logstash-plugins/logstash-input-elasticsearch/issues/133) + ## 4.8.0 - Added the ability to configure connection-, request-, and socket-timeouts with `connect_timeout_seconds`, `request_timeout_seconds`, and `socket_timeout_seconds` [#121](https://github.com/logstash-plugins/logstash-input-elasticsearch/issues/121) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 0c8c4b09..eee2f4f5 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -4,6 +4,7 @@ require "logstash/json" require "logstash/util/safe_uri" require "base64" +require_relative "patch" # .Compatibility Note diff --git a/lib/logstash/inputs/patch.rb b/lib/logstash/inputs/patch.rb new file mode 100644 index 00000000..098e68a8 --- /dev/null +++ b/lib/logstash/inputs/patch.rb @@ -0,0 +1,48 @@ +if Gem.loaded_specs['elasticsearch-transport'].version >= Gem::Version.new("7.2.0") + # elasticsearch-transport versions prior to 7.2.0 suffered of a race condition on accessing + # the connection pool. This issue was fixed with https://github.com/elastic/elasticsearch-ruby/commit/15f9d78591a6e8823948494d94b15b0ca38819d1 + # This plugin, at the moment, is forced to use v5.x so we have to monkey patch the gem. When this requirement + # ceases, this patch could be removed. + puts "WARN remove the patch code into logstash-input-elasticsearch plugin" +else + module Elasticsearch + module Transport + module Transport + module Connections + module Selector + + # "Round-robin" selector strategy (default). + # + class RoundRobin + include Base + + # @option arguments [Connections::Collection] :connections Collection with connections. + # + def initialize(arguments = {}) + super + @mutex = Mutex.new + @current = nil + end + + # Returns the next connection from the collection, rotating them in round-robin fashion. + # + # @return [Connections::Connection] + # + def select(options={}) + @mutex.synchronize do + conns = connections + if @current && (@current < conns.size-1) + @current += 1 + else + @current = 0 + end + conns[@current] + end + end + end + end + end + end + end + end +end \ No newline at end of file diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 4ba81233..f38043fa 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.8.0' + s.version = '4.8.1' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From e3d29f29b9d1516ecc30e0ade1420b2322bd9881 Mon Sep 17 00:00:00 2001 From: Luca Belluccini Date: Thu, 15 Oct 2020 23:08:54 +0200 Subject: [PATCH 121/207] [DOC] Explicitly call out permissions (#137) Bump to v.4.8.2 --- CHANGELOG.md | 3 +++ CONTRIBUTORS | 1 + docs/index.asciidoc | 5 +++++ logstash-input-elasticsearch.gemspec | 2 +- 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 66185a18..2f4d9e04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.8.2 + - [DOC] Document the permissions required in secured clusters [#137](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/137) + ## 4.8.1 - Fixed connection error when using multiple `slices`. [#133](https://github.com/logstash-plugins/logstash-input-elasticsearch/issues/133) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index f91ccc76..673b50ad 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -7,6 +7,7 @@ Contributors: * Jordan Sissel (jordansissel) * João Duarte (jsvd) * Kurt Hurtado (kurtado) +* Luca Belluccini (lucabelluccini) * Pier-Hugues Pellerin (ph) * Richard Pijnenburg (electrical) * Suyog Rao (suyograo) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index ec6eb8a5..38c23f33 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -77,6 +77,11 @@ Authentication to a secure Elasticsearch cluster is possible using _one_ of the * <> * <> +[id="plugins-{type}s-{plugin}-autz"] +==== Authorization + +Authorization to a secure Elasticsearch cluster requires `read` permission at index level and `monitoring` permissions at cluster level. +The `monitoring` permission at cluster level is necessary to perform periodic connectivity checks. [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Input Configuration Options diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index f38043fa..e76d1dd8 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.8.1' + s.version = '4.8.2' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From fc9fc8a4731d47fef77f040bfa51f0d2ea296f64 Mon Sep 17 00:00:00 2001 From: Karen Metts <35154725+karenzone@users.noreply.github.com> Date: Thu, 5 Nov 2020 11:28:59 -0500 Subject: [PATCH 122/207] Doc: Update links to logstash-to-cloud docs (#139) Cloud rework broke some links from the plugins. This PR updates the links and points one level up in docs to give user a bit more context for logstash->cloud. Bump to v.4.8.3 Fixes: #138 --- CHANGELOG.md | 3 +++ docs/index.asciidoc | 13 ++++++++++--- logstash-input-elasticsearch.gemspec | 2 +- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f4d9e04..bc933611 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.8.3 + - [DOC] Fixed links to restructured Logstash-to-cloud docs [#139](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/139) + ## 4.8.2 - [DOC] Document the permissions required in secured clusters [#137](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/137) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 38c23f33..f37ef85e 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -127,7 +127,10 @@ input plugins. Authenticate using Elasticsearch API key. Note that this option also requires enabling the `ssl` option. -Format is `id:api_key` where `id` and `api_key` are as returned by the Elasticsearch https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html[Create API key API]. +Format is `id:api_key` where `id` and `api_key` are as returned by the +Elasticsearch +https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html[Create +API key API]. [id="plugins-{type}s-{plugin}-ca_file"] ===== `ca_file` @@ -145,7 +148,9 @@ SSL Certificate Authority file in PEM encoded format, must also include any chai Cloud authentication string (":" format) is an alternative for the `user`/`password` pair. -For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_auth[Logstash-to-Cloud documentation] +For more info, check out the +https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html[Logstash-to-Cloud +documentation] [id="plugins-{type}s-{plugin}-cloud_id"] ===== `cloud_id` @@ -155,7 +160,9 @@ For more info, check out the https://www.elastic.co/guide/en/logstash/current/co Cloud ID, from the Elastic Cloud web console. If set `hosts` should not be used. -For more info, check out the https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html#_cloud_id[Logstash-to-Cloud documentation] +For more info, check out the +https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html[Logstash-to-Cloud +documentation] [id="plugins-{type}s-{plugin}-connect_timeout_seconds"] ===== `connect_timeout_seconds` diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index e76d1dd8..97dc3d95 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.8.2' + s.version = '4.8.3' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From dfe4fa647f5afe25fb5997dd8686c657c29f0e1c Mon Sep 17 00:00:00 2001 From: Ry Biesemeyer Date: Tue, 15 Dec 2020 14:10:54 -0800 Subject: [PATCH 123/207] Introduce `target` option (#140) * introduce `target` option * ci: cross-port fixes to docker image arch lookup * config: use `field_reference` validator from validator_support --- .ci/docker-setup.sh | 65 +++++++++------ CHANGELOG.md | 3 + docs/index.asciidoc | 14 ++++ lib/logstash/inputs/elasticsearch.rb | 13 ++- logstash-input-elasticsearch.gemspec | 1 + spec/inputs/elasticsearch_spec.rb | 118 +++++++++++++++++---------- 6 files changed, 144 insertions(+), 70 deletions(-) diff --git a/.ci/docker-setup.sh b/.ci/docker-setup.sh index 63f5add2..7145a5aa 100755 --- a/.ci/docker-setup.sh +++ b/.ci/docker-setup.sh @@ -4,6 +4,42 @@ # Ensure you have Docker installed locally and set the ELASTIC_STACK_VERSION environment variable. set -e + +download_and_load_docker_snapshot_artifact() { + project="${1?project name required}" + + artifact_type="docker-image" + artifact_name_base="${project}${DISTRIBUTION_SUFFIX}-${ELASTIC_STACK_VERSION}-${artifact_type}" + echo "Downloading snapshot docker image: ${project}${DISTRIBUTION_SUFFIX} (${ELASTIC_STACK_VERSION})" + + artifact_name_noarch="${artifact_name_base}.tar.gz" + artifact_name_arch="${artifact_name_base}-x86_64.tar.gz" + + jq_extract_artifact_url=".build.projects.\"${project}\".packages | (.\"${artifact_name_noarch}\" // .\"${artifact_name_arch}\") | .url" + + artifact_list=$(curl --silent "https://artifacts-api.elastic.co/v1/versions/$%7BELASTIC_STACK_VERSION%7D/builds/latest") + artifact_url=$(echo "${artifact_list}" | jq --raw-output "${jq_extract_artifact_url}") + + if [[ "${artifact_url}" == "null" ]]; then + echo "Failed to find '${artifact_name_noarch}'" + echo "Failed to find '${artifact_name_arch}'" + echo "Listing:" + echo "${artifact_list}" | jq --raw-output ".build.projects.\"${project}\".packages | keys | map(select(contains(\"${artifact_type}\")))" + return 1 + fi + + echo "${artifact_url}" + + cd /tmp + curl "${artifact_url}" > "${project}-docker-image.tar.gz" + tar xfvz "${project}-docker-image.tar.gz" repositories + echo "Loading ${project} docker image: " + cat repositories + docker load < "${project}-docker-image.tar.gz" + rm "${project}-docker-image.tar.gz" + cd - +} + VERSION_URL="https://raw.githubusercontent.com/elastic/logstash/master/ci/logstash_releases.json" if [ "$ELASTIC_STACK_VERSION" ]; then @@ -26,34 +62,9 @@ if [ "$ELASTIC_STACK_VERSION" ]; then echo "Testing against version: $ELASTIC_STACK_VERSION" if [[ "$ELASTIC_STACK_VERSION" = *"-SNAPSHOT" ]]; then - cd /tmp - - jq=".build.projects.\"logstash\".packages.\"logstash-$ELASTIC_STACK_VERSION-docker-image.tar.gz\".url" - result=$(curl --silent https://artifacts-api.elastic.co/v1/versions/$ELASTIC_STACK_VERSION/builds/latest | jq -r $jq) - echo $result - curl $result > logstash-docker-image.tar.gz - tar xfvz logstash-docker-image.tar.gz repositories - echo "Loading docker image: " - cat repositories - docker load < logstash-docker-image.tar.gz - rm logstash-docker-image.tar.gz - cd - - + download_and_load_docker_snapshot_artifact "logstash" if [ "$INTEGRATION" == "true" ]; then - - cd /tmp - - jq=".build.projects.\"elasticsearch\".packages.\"elasticsearch-$ELASTIC_STACK_VERSION-docker-image.tar.gz\".url" - result=$(curl --silent https://artifacts-api.elastic.co/v1/versions/$ELASTIC_STACK_VERSION/builds/latest | jq -r $jq) - echo $result - curl $result > elasticsearch-docker-image.tar.gz - tar xfvz elasticsearch-docker-image.tar.gz repositories - echo "Loading docker image: " - cat repositories - docker load < elasticsearch-docker-image.tar.gz - rm elasticsearch-docker-image.tar.gz - cd - - + download_and_load_docker_snapshot_artifact "elasticsearch" fi fi diff --git a/CHANGELOG.md b/CHANGELOG.md index bc933611..b21ef931 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.9.0 + - Added `target` option, allowing the hit's source to target a specific field instead of being expanded at the root of the event. This allows the input to play nicer with the Elastic Common Schema when the input does not follow the schema. [#117](https://github.com/logstash-plugins/logstash-input-elasticsearch/issues/117) + ## 4.8.3 - [DOC] Fixed links to restructured Logstash-to-cloud docs [#139](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/139) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index f37ef85e..792a7fdc 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -111,6 +111,7 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |<>|No | <> | <>|No +| <> | https://www.elastic.co/guide/en/logstash/master/field-references-deepdive.html[field reference] | No | <> |<>|No |======================================================================= @@ -377,6 +378,19 @@ server (i.e. HTTPS will be used instead of plain HTTP). The maximum amount of time, in seconds, to wait on an incomplete response from Elasticsearch while no additional data has been appended. Socket timeouts usually occur while waiting for the first byte of a response, such as when executing a particularly complex query. + +[id="plugins-{type}s-{plugin}-target"] +===== `target` + +* Value type is https://www.elastic.co/guide/en/logstash/master/field-references-deepdive.html[field reference] +* There is no default value for this setting. + +Without a `target`, events are created from each hit's `_source` at the root level. +When the `target` is set to a field reference, the `_source` of the hit is placed in the target field instead. + +This option can be useful to avoid populating unknown fields when a downstream schema such as ECS is enforced. +It is also possible to target an entry in the event's metadata, which will be available during event processing but not exported to your outputs (e.g., `target \=> "[@metadata][_source]"`). + [id="plugins-{type}s-{plugin}-user"] ===== `user` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index eee2f4f5..15e7567f 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -3,6 +3,7 @@ require "logstash/namespace" require "logstash/json" require "logstash/util/safe_uri" +require 'logstash/plugin_mixins/validator_support/field_reference_validation_adapter' require "base64" require_relative "patch" @@ -62,6 +63,8 @@ # # class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base + extend LogStash::PluginMixins::ValidatorSupport::FieldReferenceValidationAdapter + config_name "elasticsearch" default :codec, "json" @@ -175,6 +178,9 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # exactly once. config :schedule, :validate => :string + # If set, the _source of each hit will be added nested under the target instead of at the top-level + config :target, :validate => :field_reference + def register require "elasticsearch" require "rufus/scheduler" @@ -298,7 +304,12 @@ def process_next_scroll(output_queue, scroll_id) end def push_hit(hit, output_queue) - event = LogStash::Event.new(hit['_source']) + if @target.nil? + event = LogStash::Event.new(hit['_source']) + else + event = LogStash::Event.new + event.set(@target, hit['_source']) + end if @docinfo # do not assume event[@docinfo_target] to be in-place updatable. first get it, update it, then at the end set it in the event. diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 97dc3d95..6585eeee 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -20,6 +20,7 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } # Gem dependencies + s.add_runtime_dependency "logstash-mixin-validator_support", '~> 1.0' s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" s.add_runtime_dependency 'elasticsearch', '>= 5.0.3' diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 0ecc3ac6..f90934ad 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -40,57 +40,91 @@ class LogStash::Inputs::TestableElasticsearch < LogStash::Inputs::Elasticsearch end end - it "should retrieve json event from elasticseach" do - config = %q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + context 'creating events from Elasticsearch' do + let(:config) do + %q[ + input { + elasticsearch { + hosts => ["localhost"] + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + } } + ] + end + + let(:mock_response) do + { + "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "failed" => 0 + }, + "hits" => { + "total" => 1, + "max_score" => 1.0, + "hits" => [ { + "_index" => "logstash-2014.10.12", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "_source" => { "message" => ["ohayo"] } + } ] + } } - ] - - response = { - "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", - "took" => 27, - "timed_out" => false, - "_shards" => { - "total" => 169, - "successful" => 169, - "failed" => 0 - }, - "hits" => { - "total" => 1, - "max_score" => 1.0, - "hits" => [ { - "_index" => "logstash-2014.10.12", - "_type" => "logs", - "_id" => "C5b2xLQwTZa76jBmHIbwHQ", - "_score" => 1.0, - "_source" => { "message" => ["ohayo"] } - } ] + end + + let(:mock_scroll_response) do + { + "_scroll_id" => "r453Wc1jh0caLJhSDg", + "hits" => { "hits" => [] } } - } + end - scroll_reponse = { - "_scroll_id" => "r453Wc1jh0caLJhSDg", - "hits" => { "hits" => [] } - } + before(:each) do + client = Elasticsearch::Client.new + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + expect(client).to receive(:search).with(any_args).and_return(mock_response) + expect(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(mock_scroll_response) + expect(client).to receive(:clear_scroll).and_return(nil) + end - client = Elasticsearch::Client.new - expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) - expect(client).to receive(:search).with(any_args).and_return(response) - expect(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(scroll_reponse) - expect(client).to receive(:clear_scroll).and_return(nil) + it 'creates the events from the hits' do + event = input(config) do |pipeline, queue| + queue.pop + end - event = input(config) do |pipeline, queue| - queue.pop + expect(event).to be_a(LogStash::Event) + puts event.to_hash_with_metadata + expect(event.get("message")).to eql [ "ohayo" ] end - expect(event).to be_a(LogStash::Event) - expect(event.get("message")).to eql [ "ohayo" ] - end + context 'when a target is set' do + let(:config) do + %q[ + input { + elasticsearch { + hosts => ["localhost"] + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + target => "[@metadata][_source]" + } + } + ] + end + it 'creates the event using the target' do + event = input(config) do |pipeline, queue| + queue.pop + end + + expect(event).to be_a(LogStash::Event) + puts event.to_hash_with_metadata + expect(event.get("[@metadata][_source][message]")).to eql [ "ohayo" ] + end + end + end # This spec is an adapter-spec, ensuring that we send the right sequence of messages to our Elasticsearch Client # to support sliced scrolling. The underlying implementation will spawn its own threads to consume, so we must be From bbe3ddc3f00a47467fea7e6258503a55172daf4e Mon Sep 17 00:00:00 2001 From: Ry Biesemeyer Date: Tue, 15 Dec 2020 19:06:01 -0800 Subject: [PATCH 124/207] bump to 4.9.0 (#144) --- logstash-input-elasticsearch.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 6585eeee..2f7e52f3 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.8.3' + s.version = '4.9.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 2f1bd7ed35f9a0a2c7e881226901c813b52398d0 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Mon, 4 Jan 2021 12:49:18 +0000 Subject: [PATCH 125/207] [skip ci] update travis ci badge from .org to .com --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4551c4ed..c6a17d4d 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Logstash Plugin [![Gem Version](https://badge.fury.io/rb/logstash-input-elasticsearch.svg)](https://badge.fury.io/rb/logstash-input-elasticsearch) -[![Travis Build Status](https://travis-ci.org/logstash-plugins/logstash-input-elasticsearch.svg)](https://travis-ci.org/logstash-plugins/logstash-input-elasticsearch) +[![Travis Build Status](https://travis-ci.com/logstash-plugins/logstash-input-elasticsearch.svg)](https://travis-ci.com/logstash-plugins/logstash-input-elasticsearch) This is a plugin for [Logstash](https://github.com/elastic/logstash). From ae4706fadfa0db85d8ceaa1eb5fa938af60eb6ca Mon Sep 17 00:00:00 2001 From: Mike Powers Date: Thu, 7 Jan 2021 19:14:57 -0500 Subject: [PATCH 126/207] Added missing quote to docinfo_fields example. (#145) --- docs/index.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 792a7fdc..0208a227 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -217,7 +217,7 @@ Example elasticsearch { docinfo => true add_field => { - identifier => %{[@metadata][_index]}:%{[@metadata][_type]}:%{[@metadata][_id]}" + identifier => "%{[@metadata][_index]}:%{[@metadata][_type]}:%{[@metadata][_id]}" } } } From 9ce6fb6c34a4103129736dafb1066ea91ac77265 Mon Sep 17 00:00:00 2001 From: Karen Metts <35154725+karenzone@users.noreply.github.com> Date: Tue, 12 Jan 2021 13:21:04 -0500 Subject: [PATCH 127/207] Doc: Replace cross doc links with attributes (#143) Add attribute and fix linking Update changelog and bump to v.4.9.1 --- CHANGELOG.md | 4 ++++ docs/index.asciidoc | 26 +++++++++++--------------- logstash-input-elasticsearch.gemspec | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b21ef931..5cb2cd96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 4.9.1 + - [DOC] Replaced hard-coded links with shared attributes [#143](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/143) + - [DOC] Added missing quote to docinfo_fields example [#145](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/145) + ## 4.9.0 - Added `target` option, allowing the hit's source to target a specific field instead of being expanded at the root of the event. This allows the input to play nicer with the Elastic Common Schema when the input does not follow the schema. [#117](https://github.com/logstash-plugins/logstash-input-elasticsearch/issues/117) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 0208a227..42ea523f 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -111,7 +111,7 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |<>|No | <> | <>|No -| <> | https://www.elastic.co/guide/en/logstash/master/field-references-deepdive.html[field reference] | No +| <> | {logstash-ref}/field-references-deepdive.html[field reference] | No | <> |<>|No |======================================================================= @@ -130,7 +130,7 @@ Authenticate using Elasticsearch API key. Note that this option also requires en Format is `id:api_key` where `id` and `api_key` are as returned by the Elasticsearch -https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html[Create +{ref}/security-api-create-api-key.html[Create API key API]. [id="plugins-{type}s-{plugin}-ca_file"] @@ -150,8 +150,7 @@ SSL Certificate Authority file in PEM encoded format, must also include any chai Cloud authentication string (":" format) is an alternative for the `user`/`password` pair. For more info, check out the -https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html[Logstash-to-Cloud -documentation] +{logstash-ref}/connecting-to-cloud.html[Logstash-to-Cloud documentation]. [id="plugins-{type}s-{plugin}-cloud_id"] ===== `cloud_id` @@ -162,8 +161,7 @@ documentation] Cloud ID, from the Elastic Cloud web console. If set `hosts` should not be used. For more info, check out the -https://www.elastic.co/guide/en/logstash/current/connecting-to-cloud.html[Logstash-to-Cloud -documentation] +{logstash-ref}/connecting-to-cloud.html[Logstash-to-Cloud documentation]. [id="plugins-{type}s-{plugin}-connect_timeout_seconds"] ===== `connect_timeout_seconds` @@ -260,10 +258,9 @@ can be either IP, HOST, IP:port, or HOST:port. The port defaults to * Value type is <> * Default value is `"logstash-*"` -The index or alias to search. See -https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-index.html[Multi Indices documentation] -in the Elasticsearch documentation for more information on how to reference -multiple indices. +The index or alias to search. See {ref}/multi-index.html[Multi Indices +documentation] in the Elasticsearch documentation for more information on how to +reference multiple indices. [id="plugins-{type}s-{plugin}-password"] @@ -292,9 +289,8 @@ environment variables e.g. `proxy => '${LS_PROXY:}'`. * Value type is <> * Default value is `'{ "sort": [ "_doc" ] }'` -The query to be executed. Read the -https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html[Elasticsearch query DSL documentation] -for more information. +The query to be executed. Read the {ref}/query-dsl.html[Elasticsearch query DSL +documentation] for more information. [id="plugins-{type}s-{plugin}-request_timeout_seconds"] ===== `request_timeout_seconds` @@ -345,7 +341,7 @@ This allows you to set the maximum number of hits returned per scroll. In some cases, it is possible to improve overall throughput by consuming multiple distinct slices of a query simultaneously using -https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html#slice-scroll[sliced scrolls], +{ref}/paginate-search-results.html#slice-scroll[sliced scrolls], especially if the pipeline is spending significant time waiting on Elasticsearch to provide results. @@ -382,7 +378,7 @@ Socket timeouts usually occur while waiting for the first byte of a response, su [id="plugins-{type}s-{plugin}-target"] ===== `target` -* Value type is https://www.elastic.co/guide/en/logstash/master/field-references-deepdive.html[field reference] +* Value type is {logstash-ref}/field-references-deepdive.html[field reference] * There is no default value for this setting. Without a `target`, events are created from each hit's `_source` at the root level. diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 2f7e52f3..3742c207 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.9.0' + s.version = '4.9.1' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From f251ba9c85118bfc7280a3ca3598c3016da6c281 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Wed, 3 Mar 2021 10:53:00 +0000 Subject: [PATCH 128/207] [tests] change super to super() - jruby/jruby#6571 --- spec/inputs/elasticsearch_spec.rb | 24 +++++++++---------- spec/inputs/integration/elasticsearch_spec.rb | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index f90934ad..80452e88 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -166,7 +166,7 @@ class LogStash::Inputs::TestableElasticsearch < LogStash::Inputs::Elasticsearch end context 'without slices directive' do - let(:config) { super.tap { |h| h.delete('slices') } } + let(:config) { super().tap { |h| h.delete('slices') } } it 'runs just one slice' do expect(plugin).to receive(:do_run_slice).with(duck_type(:<<)) expect(Thread).to_not receive(:new) @@ -563,7 +563,7 @@ def synchronize_method!(object, method_name) 'sample:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvJGFjMzFlYmI5MDI0MTc3MzE1NzA0M2MzNGZkMjZmZDQ2OjkyNDMkYTRjMDYyMzBlNDhjOGZjZTdiZTg4YTA3NGEzYmIzZTA6OTI0NA==' end - let(:config) { super.merge({ 'cloud_id' => valid_cloud_id }) } + let(:config) { super().merge({ 'cloud_id' => valid_cloud_id }) } it "should set host(s)" do plugin.register @@ -578,7 +578,7 @@ def synchronize_method!(object, method_name) end context 'invalid' do - let(:config) { super.merge({ 'cloud_id' => 'invalid:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlv' }) } + let(:config) { super().merge({ 'cloud_id' => 'invalid:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlv' }) } it "should fail" do expect { plugin.register }.to raise_error LogStash::ConfigurationError, /cloud_id.*? is invalid/ @@ -586,7 +586,7 @@ def synchronize_method!(object, method_name) end context 'hosts also set' do - let(:config) { super.merge({ 'cloud_id' => valid_cloud_id, 'hosts' => [ 'localhost:9200' ] }) } + let(:config) { super().merge({ 'cloud_id' => valid_cloud_id, 'hosts' => [ 'localhost:9200' ] }) } it "should fail" do expect { plugin.register }.to raise_error LogStash::ConfigurationError, /cloud_id and hosts/ @@ -595,7 +595,7 @@ def synchronize_method!(object, method_name) end if LOGSTASH_VERSION > '6.0' describe "cloud.auth" do - let(:config) { super.merge({ 'cloud_auth' => LogStash::Util::Password.new('elastic:my-passwd-00') }) } + let(:config) { super().merge({ 'cloud_auth' => LogStash::Util::Password.new('elastic:my-passwd-00') }) } it "should set authorization" do plugin.register @@ -606,7 +606,7 @@ def synchronize_method!(object, method_name) end context 'invalid' do - let(:config) { super.merge({ 'cloud_auth' => 'invalid-format' }) } + let(:config) { super().merge({ 'cloud_auth' => 'invalid-format' }) } it "should fail" do expect { plugin.register }.to raise_error LogStash::ConfigurationError, /cloud_auth.*? format/ @@ -614,7 +614,7 @@ def synchronize_method!(object, method_name) end context 'user also set' do - let(:config) { super.merge({ 'cloud_auth' => 'elastic:my-passwd-00', 'user' => 'another' }) } + let(:config) { super().merge({ 'cloud_auth' => 'elastic:my-passwd-00', 'user' => 'another' }) } it "should fail" do expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Multiple authentication options are specified/ @@ -624,7 +624,7 @@ def synchronize_method!(object, method_name) describe "api_key" do context "without ssl" do - let(:config) { super.merge({ 'api_key' => LogStash::Util::Password.new('foo:bar') }) } + let(:config) { super().merge({ 'api_key' => LogStash::Util::Password.new('foo:bar') }) } it "should fail" do expect { plugin.register }.to raise_error LogStash::ConfigurationError, /api_key authentication requires SSL\/TLS/ @@ -632,7 +632,7 @@ def synchronize_method!(object, method_name) end context "with ssl" do - let(:config) { super.merge({ 'api_key' => LogStash::Util::Password.new('foo:bar'), "ssl" => true }) } + let(:config) { super().merge({ 'api_key' => LogStash::Util::Password.new('foo:bar'), "ssl" => true }) } it "should set authorization" do plugin.register @@ -643,7 +643,7 @@ def synchronize_method!(object, method_name) end context 'user also set' do - let(:config) { super.merge({ 'api_key' => 'foo:bar', 'user' => 'another' }) } + let(:config) { super().merge({ 'api_key' => 'foo:bar', 'user' => 'another' }) } it "should fail" do expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Multiple authentication options are specified/ @@ -653,7 +653,7 @@ def synchronize_method!(object, method_name) end if LOGSTASH_VERSION > '6.0' describe "proxy" do - let(:config) { super.merge({ 'proxy' => 'http://localhost:1234/' }) } + let(:config) { super().merge({ 'proxy' => 'http://localhost:1234/' }) } it "should set proxy" do plugin.register @@ -664,7 +664,7 @@ def synchronize_method!(object, method_name) end context 'invalid' do - let(:config) { super.merge({ 'proxy' => '${A_MISSING_ENV_VAR:}' }) } + let(:config) { super().merge({ 'proxy' => '${A_MISSING_ENV_VAR:}' }) } it "should not set proxy" do plugin.register diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb index 107c24d7..059cf7de 100644 --- a/spec/inputs/integration/elasticsearch_spec.rb +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -51,7 +51,7 @@ let(:password) { 'abc123' } let(:ca_file) { "spec/fixtures/test_certs/test.crt" } let(:client_options) {{:ca_file => ca_file, :user => user, :password => password}} - let(:config) { super.merge({ + let(:config) { super().merge({ 'user' => user, 'password' => password, 'ssl' => true, From 13082581a29d20775eaf57148aa71a510c053b19 Mon Sep 17 00:00:00 2001 From: Karol Bucek Date: Wed, 4 Aug 2021 15:47:38 +0200 Subject: [PATCH 129/207] Test: fix compat issue with latest client (#152) --- spec/inputs/elasticsearch_spec.rb | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 80452e88..492d9b47 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -568,13 +568,13 @@ def synchronize_method!(object, method_name) it "should set host(s)" do plugin.register client = plugin.send(:client) - expect( client.transport.hosts ).to eql [{ - :scheme => "https", - :host => "ac31ebb90241773157043c34fd26fd46.us-central1.gcp.cloud.es.io", - :port => 9243, - :path => "", - :protocol => "https" - }] + expect( client.transport.instance_variable_get(:@hosts) ).to eql [{ + :scheme => "https", + :host => "ac31ebb90241773157043c34fd26fd46.us-central1.gcp.cloud.es.io", + :port => 9243, + :path => "", + :protocol => "https" + }] end context 'invalid' do @@ -600,7 +600,7 @@ def synchronize_method!(object, method_name) it "should set authorization" do plugin.register client = plugin.send(:client) - auth_header = client.transport.options[:transport_options][:headers][:Authorization] + auth_header = client.transport.instance_variable_get(:@options)[:transport_options][:headers][:Authorization] expect( auth_header ).to eql "Basic #{Base64.encode64('elastic:my-passwd-00').rstrip}" end @@ -637,7 +637,7 @@ def synchronize_method!(object, method_name) it "should set authorization" do plugin.register client = plugin.send(:client) - auth_header = client.transport.options[:transport_options][:headers][:Authorization] + auth_header = client.transport.instance_variable_get(:@options)[:transport_options][:headers][:Authorization] expect( auth_header ).to eql "ApiKey #{Base64.strict_encode64('foo:bar')}" end @@ -658,7 +658,7 @@ def synchronize_method!(object, method_name) it "should set proxy" do plugin.register client = plugin.send(:client) - proxy = client.transport.options[:transport_options][:proxy] + proxy = client.transport.instance_variable_get(:@options)[:transport_options][:proxy] expect( proxy ).to eql "http://localhost:1234/" end @@ -670,7 +670,7 @@ def synchronize_method!(object, method_name) plugin.register client = plugin.send(:client) - expect( client.transport.options[:transport_options] ).to_not include(:proxy) + expect( client.transport.instance_variable_get(:@options)[:transport_options] ).to_not include(:proxy) end end end From bef88cc2dc95718fc376441e933d142ff7cb5639 Mon Sep 17 00:00:00 2001 From: Karol Bucek Date: Thu, 12 Aug 2021 15:51:08 +0200 Subject: [PATCH 130/207] Fix: client gem 7.x compatibility (#153) + Refactor: unnecessary var declaration warning + CI: refactor ES docker-compose - run entry-point + CI: force SECURE_INTEGRATION to require auth + CI: more (explicit) 7.x targets (for the future) + Test: use Faraday transport for ES setup + Test: regenerate certs + restrain verification mode --- .ci/Dockerfile.elasticsearch | 9 ++-- .ci/docker-compose.override.yml | 11 +++- .ci/elasticsearch-run.sh | 4 -- .travis.yml | 5 ++ CHANGELOG.md | 5 ++ lib/logstash/inputs/elasticsearch.rb | 13 +++-- ...csearch_transport_connections_selector.rb} | 15 +++--- ..._elasticsearch_transport_http_manticore.rb | 33 ++++++++++++ logstash-input-elasticsearch.gemspec | 8 ++- spec/es_helper.rb | 33 ++++++------ spec/fixtures/test_certs/ca.crt | 20 ++++++++ spec/fixtures/test_certs/ca.key | 27 ++++++++++ spec/fixtures/test_certs/ca/ca.crt | 32 ------------ spec/fixtures/test_certs/ca/ca.key | 51 ------------------- spec/fixtures/test_certs/es.crt | 20 ++++++++ spec/fixtures/test_certs/es.key | 27 ++++++++++ spec/fixtures/test_certs/test.crt | 36 ------------- spec/fixtures/test_certs/test.key | 51 ------------------- spec/inputs/elasticsearch_spec.rb | 4 +- spec/inputs/integration/elasticsearch_spec.rb | 36 ++++++++----- 20 files changed, 213 insertions(+), 227 deletions(-) delete mode 100755 .ci/elasticsearch-run.sh rename lib/logstash/inputs/{patch.rb => elasticsearch/patches/_elasticsearch_transport_connections_selector.rb} (72%) create mode 100644 lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_http_manticore.rb create mode 100644 spec/fixtures/test_certs/ca.crt create mode 100644 spec/fixtures/test_certs/ca.key delete mode 100644 spec/fixtures/test_certs/ca/ca.crt delete mode 100755 spec/fixtures/test_certs/ca/ca.key create mode 100644 spec/fixtures/test_certs/es.crt create mode 100644 spec/fixtures/test_certs/es.key delete mode 100644 spec/fixtures/test_certs/test.crt delete mode 100644 spec/fixtures/test_certs/test.key diff --git a/.ci/Dockerfile.elasticsearch b/.ci/Dockerfile.elasticsearch index afb65800..2756c149 100644 --- a/.ci/Dockerfile.elasticsearch +++ b/.ci/Dockerfile.elasticsearch @@ -9,11 +9,12 @@ ARG SECURE_INTEGRATION RUN rm -f $es_path/config/scripts COPY --chown=elasticsearch:elasticsearch spec/fixtures/test_certs/* $es_path/config/test_certs/ -COPY --chown=elasticsearch:elasticsearch .ci/elasticsearch-run.sh $es_path/ -RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.enabled: $SECURE_INTEGRATION" >> $es_yml; fi -RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.key: $es_path/config/test_certs/test.key" >> $es_yml; fi -RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.certificate: $es_path/config/test_certs/test.crt" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.enabled: true" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.key: $es_path/config/test_certs/es.key" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.certificate: $es_path/config/test_certs/es.crt" >> $es_yml; fi RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.certificate_authorities: [ '$es_path/config/test_certs/ca.crt' ]" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.verification_mode: certificate" >> $es_yml; fi + RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then $es_path/bin/elasticsearch-users useradd simpleuser -p abc123 -r superuser; fi RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then $es_path/bin/elasticsearch-users useradd 'f@ncyuser' -p 'ab%12#' -r superuser; fi diff --git a/.ci/docker-compose.override.yml b/.ci/docker-compose.override.yml index 7b364c3c..1d05943b 100644 --- a/.ci/docker-compose.override.yml +++ b/.ci/docker-compose.override.yml @@ -16,8 +16,17 @@ services: - ELASTIC_STACK_VERSION=$ELASTIC_STACK_VERSION - INTEGRATION=${INTEGRATION:-false} - SECURE_INTEGRATION=${SECURE_INTEGRATION:-false} - command: /usr/share/elasticsearch/elasticsearch-run.sh + environment: + #- ELASTIC_PASSWORD=$ELASTIC_PASSWORD a user is setup manually + - xpack.security.enabled=${SECURE_INTEGRATION:-false} + - bootstrap.memory_lock=true + - discovery.type=single-node + - ES_JAVA_OPTS=-Xms640m -Xmx640m tty: true + ulimits: + memlock: + soft: -1 + hard: -1 ports: - "9200:9200" user: elasticsearch diff --git a/.ci/elasticsearch-run.sh b/.ci/elasticsearch-run.sh deleted file mode 100755 index 9df8c88d..00000000 --- a/.ci/elasticsearch-run.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -ex - -/usr/share/elasticsearch/bin/elasticsearch -Ediscovery.type=single-node diff --git a/.travis.yml b/.travis.yml index 4c3b3f8e..1a29639f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,9 @@ import: - logstash-plugins/.ci:travis/travis.yml@1.x +before_install: +- sudo sysctl -w vm.max_map_count=262144 # due ES bootstrap requirements + env: - INTEGRATION=false ELASTIC_STACK_VERSION=6.x - INTEGRATION=false ELASTIC_STACK_VERSION=7.x @@ -11,3 +14,5 @@ env: - INTEGRATION=true ELASTIC_STACK_VERSION=7.x SNAPSHOT=true - INTEGRATION=true ELASTIC_STACK_VERSION=8.x SNAPSHOT=true - SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x +- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.14.0 +- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.9.3 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cb2cd96..68dbc30f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## 4.9.2 + - Fix: a regression (in LS 7.14.0) where due the elasticsearch client update (from 5.0.5 to 7.5.0) the `Authorization` + header isn't passed, this leads to the plugin not being able to leverage `user`/`password` credentials set by the user. + [#153](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/153) + ## 4.9.1 - [DOC] Replaced hard-coded links with shared attributes [#143](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/143) - [DOC] Added missing quote to docinfo_fields example [#145](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/145) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 15e7567f..c88371ef 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -5,8 +5,11 @@ require "logstash/util/safe_uri" require 'logstash/plugin_mixins/validator_support/field_reference_validation_adapter' require "base64" -require_relative "patch" +require "elasticsearch" +require "elasticsearch/transport/transport/http/manticore" +require_relative "elasticsearch/patches/_elasticsearch_transport_http_manticore" +require_relative "elasticsearch/patches/_elasticsearch_transport_connections_selector" # .Compatibility Note # [NOTE] @@ -182,9 +185,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base config :target, :validate => :field_reference def register - require "elasticsearch" require "rufus/scheduler" - require "elasticsearch/transport/transport/http/manticore" @options = { :index => @index, @@ -225,7 +226,6 @@ def register end - def run(output_queue) if @schedule @scheduler = Rufus::Scheduler.new(:max_work_threads => 1) @@ -267,7 +267,6 @@ def do_run_slice(output_queue, slice_id=nil) logger.info("Slice starting", slice_id: slice_id, slices: @slices) unless slice_id.nil? - scroll_id = nil begin r = search_request(slice_options) @@ -388,14 +387,14 @@ def setup_basic_auth(user, password) return {} unless user && password && password.value token = ::Base64.strict_encode64("#{user}:#{password.value}") - { Authorization: "Basic #{token}" } + { 'Authorization' => "Basic #{token}" } end def setup_api_key(api_key) return {} unless (api_key && api_key.value) token = ::Base64.strict_encode64(api_key.value) - { Authorization: "ApiKey #{token}" } + { 'Authorization' => "ApiKey #{token}" } end def fill_user_password_from_cloud_auth diff --git a/lib/logstash/inputs/patch.rb b/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_connections_selector.rb similarity index 72% rename from lib/logstash/inputs/patch.rb rename to lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_connections_selector.rb index 098e68a8..b0d87869 100644 --- a/lib/logstash/inputs/patch.rb +++ b/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_connections_selector.rb @@ -1,10 +1,13 @@ -if Gem.loaded_specs['elasticsearch-transport'].version >= Gem::Version.new("7.2.0") +require 'elasticsearch' +require 'elasticsearch/transport/transport/connections/selector' + +if Gem.loaded_specs['elasticsearch-transport'].version < Gem::Version.new("7.2.0") # elasticsearch-transport versions prior to 7.2.0 suffered of a race condition on accessing - # the connection pool. This issue was fixed with https://github.com/elastic/elasticsearch-ruby/commit/15f9d78591a6e8823948494d94b15b0ca38819d1 - # This plugin, at the moment, is forced to use v5.x so we have to monkey patch the gem. When this requirement - # ceases, this patch could be removed. - puts "WARN remove the patch code into logstash-input-elasticsearch plugin" -else + # the connection pool. This issue was fixed (in 7.2.0) with + # https://github.com/elastic/elasticsearch-ruby/commit/15f9d78591a6e8823948494d94b15b0ca38819d1 + # + # This plugin, at the moment, is using elasticsearch >= 5.0.5 + # When this requirement ceases, this patch could be removed. module Elasticsearch module Transport module Transport diff --git a/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_http_manticore.rb b/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_http_manticore.rb new file mode 100644 index 00000000..c7c712e0 --- /dev/null +++ b/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_http_manticore.rb @@ -0,0 +1,33 @@ +# encoding: utf-8 +require "elasticsearch" +require "elasticsearch/transport/transport/http/manticore" + +es_client_version = Gem.loaded_specs['elasticsearch-transport'].version +if es_client_version >= Gem::Version.new('7.2') && es_client_version < Gem::Version.new('7.16') + # elasticsearch-transport 7.2.0 - 7.14.0 had a bug where setting http headers + # ES::Client.new ..., transport_options: { headers: { 'Authorization' => ... } } + # would be lost https://github.com/elastic/elasticsearch-ruby/issues/1428 + # + # NOTE: needs to be idempotent as filter ES plugin might apply the same patch! + # + # @private + module Elasticsearch + module Transport + module Transport + module HTTP + class Manticore + + def apply_headers(request_options, options) + headers = (options && options[:headers]) || {} + headers[CONTENT_TYPE_STR] = find_value(headers, CONTENT_TYPE_REGEX) || DEFAULT_CONTENT_TYPE + headers[USER_AGENT_STR] = find_value(headers, USER_AGENT_REGEX) || user_agent_header + headers[ACCEPT_ENCODING] = GZIP if use_compression? + (request_options[:headers] ||= {}).merge!(headers) # this line was changed + end + + end + end + end + end + end +end \ No newline at end of file diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 3742c207..9642c6f1 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.9.1' + s.version = '4.9.2' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -23,17 +23,15 @@ Gem::Specification.new do |s| s.add_runtime_dependency "logstash-mixin-validator_support", '~> 1.0' s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" - s.add_runtime_dependency 'elasticsearch', '>= 5.0.3' + s.add_runtime_dependency 'elasticsearch', '>= 5.0.5' # LS >= 6.7 and < 7.14 all used version 5.0.5 s.add_runtime_dependency 'logstash-codec-json' - s.add_runtime_dependency 'logstash-codec-plain' s.add_runtime_dependency 'sequel' s.add_runtime_dependency 'tzinfo' s.add_runtime_dependency 'tzinfo-data' s.add_runtime_dependency 'rufus-scheduler' s.add_runtime_dependency 'manticore', "~> 0.6" - s.add_runtime_dependency 'faraday', "~> 0.15.4" - + s.add_development_dependency 'faraday', "~> 0.15.4" s.add_development_dependency 'logstash-devutils' s.add_development_dependency 'timecop' end diff --git a/spec/es_helper.rb b/spec/es_helper.rb index 9d3e81ef..c3861211 100644 --- a/spec/es_helper.rb +++ b/spec/es_helper.rb @@ -1,30 +1,31 @@ module ESHelper def self.get_host_port - return "elasticsearch:9200" if ENV["INTEGRATION"] == "true" || ENV["SECURE_INTEGRATION"] == "true" - raise "This setting is only used for integration tests" + if ENV["INTEGRATION"] == "true" || ENV["SECURE_INTEGRATION"] == "true" + "elasticsearch:9200" + else + "localhost:9200" # for local running integration specs outside docker + end end - def self.get_client(options = {}) - ssl_options = {} - hosts = [get_host_port] + def self.get_client(options) + require 'elasticsearch/transport/transport/http/faraday' # supports user/password options + host, port = get_host_port.split(':') + host_opts = { host: host, port: port, scheme: 'http' } + ssl_opts = {} if options[:ca_file] - ssl_options = { :ssl => true, :ca_file => options[:ca_file] } - hosts.map! do |h| - host, port = h.split(":") - { :host => host, :scheme => 'https', :port => port } - end + ssl_opts = { ca_file: options[:ca_file], version: 'TLSv1.2', verify: false } + host_opts[:scheme] = 'https' end - transport_options = {} - if options[:user] && options[:password] - token = Base64.strict_encode64("#{options[:user]}:#{options[:password]}") - transport_options[:headers] = { :Authorization => "Basic #{token}" } + host_opts[:user] = options[:user] + host_opts[:password] = options[:password] end - @client = Elasticsearch::Client.new(:hosts => hosts, :transport_options => transport_options, :ssl => ssl_options, - :transport_class => ::Elasticsearch::Transport::Transport::HTTP::Manticore) + Elasticsearch::Client.new(hosts: [host_opts], + transport_options: { ssl: ssl_opts }, + transport_class: Elasticsearch::Transport::Transport::HTTP::Faraday) end def self.doc_type diff --git a/spec/fixtures/test_certs/ca.crt b/spec/fixtures/test_certs/ca.crt new file mode 100644 index 00000000..e9312d7c --- /dev/null +++ b/spec/fixtures/test_certs/ca.crt @@ -0,0 +1,20 @@ +-----BEGIN CERTIFICATE----- +MIIDSTCCAjGgAwIBAgIUUcAg9c8B8jiliCkOEJyqoAHrmccwDQYJKoZIhvcNAQEL +BQAwNDEyMDAGA1UEAxMpRWxhc3RpYyBDZXJ0aWZpY2F0ZSBUb29sIEF1dG9nZW5l +cmF0ZWQgQ0EwHhcNMjEwODEyMDUxNDU1WhcNMjQwODExMDUxNDU1WjA0MTIwMAYD +VQQDEylFbGFzdGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTCC +ASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAK1HuusRuGNsztd4EQvqwcMr +8XvnNNaalerpMOorCGySEFrNf0HxDIVMGMCrOv1F8SvlcGq3XANs2MJ4F2xhhLZr +PpqVHx+QnSZ66lu5R89QVSuMh/dCMxhNBlOA/dDlvy+EJBl9H791UGy/ChhSgaBd +OKVyGkhjErRTeMIq7rR7UG6GL/fV+JGy41UiLrm1KQP7/XVD9UzZfGq/hylFkTPe +oox5BUxdxUdDZ2creOID+agtIYuJVIkelKPQ+ljBY3kWBRexqJQsvyNUs1gZpjpz +YUCzuVcXDRuJXYQXGqWXhsBPfJv+ZcSyMIBUfWT/G13cWU1iwufPy0NjajowPZsC +AwEAAaNTMFEwHQYDVR0OBBYEFMgkye5+2l+TE0I6RsXRHjGBwpBGMB8GA1UdIwQY +MBaAFMgkye5+2l+TE0I6RsXRHjGBwpBGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZI +hvcNAQELBQADggEBAIgtJW8sy5lBpzPRHkmWSS/SCZIPsABW+cHqQ3e0udrI3CLB +G9n7yqAPWOBTbdqC2GM8dvAS/Twx4Bub/lWr84dFCu+t0mQq4l5kpJMVRS0KKXPL +DwJbUN3oPNYy4uPn5Xi+XY3BYFce5vwJUsqIxeAbIOxVTNx++k5DFnB0ESAM23QL +sgUZl7xl3/DkdO4oHj30gmTRW9bjCJ6umnHIiO3JoJatrprurUIt80vHC4Ndft36 +NBQ9mZpequ4RYjpSZNLcVsxyFAYwEY4g8MvH0MoMo2RRLfehmMCzXnI/Wh2qEyYz +emHprBii/5y1HieKXlX9CZRb5qEPHckDVXW3znw= +-----END CERTIFICATE----- diff --git a/spec/fixtures/test_certs/ca.key b/spec/fixtures/test_certs/ca.key new file mode 100644 index 00000000..ace213f3 --- /dev/null +++ b/spec/fixtures/test_certs/ca.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEowIBAAKCAQEArUe66xG4Y2zO13gRC+rBwyvxe+c01pqV6ukw6isIbJIQWs1/ +QfEMhUwYwKs6/UXxK+VwardcA2zYwngXbGGEtms+mpUfH5CdJnrqW7lHz1BVK4yH +90IzGE0GU4D90OW/L4QkGX0fv3VQbL8KGFKBoF04pXIaSGMStFN4wirutHtQboYv +99X4kbLjVSIuubUpA/v9dUP1TNl8ar+HKUWRM96ijHkFTF3FR0NnZyt44gP5qC0h +i4lUiR6Uo9D6WMFjeRYFF7GolCy/I1SzWBmmOnNhQLO5VxcNG4ldhBcapZeGwE98 +m/5lxLIwgFR9ZP8bXdxZTWLC58/LQ2NqOjA9mwIDAQABAoIBABmBC0P6Ebegljkk +lO26GdbOKvbfqulDS3mN5QMyXkUMopea03YzMnKUJriE+2O33a1mUcuDPWnLpYPK +BTiQieYHlulNtY0Bzf+R69igRq9+1WpZftGnzrlu7NVxkOokRqWJv3546ilV7QZ0 +f9ngmu+tiN7hEnlBC8m613VMuGGb3czwbCizEVZxlZX0Dk2GExbH7Yf3NNs/aOP/ +8x6CqgL+rhrtOQ80xwRrOlEF8oSSjXCzypa3nFv21YO3J2lVo4BoIwnHgOzyz46A +b37gekqXXajIYQ0HAB+NDgVoCRFFJ7Xe16mgB3DpyUpUJzwiMedJkeQ0TprIownQ ++1mPe9ECgYEA/K4jc0trr3sk8KtcZjOYdpvwrhEqSSGEPeGfFujZaKOb8PZ8PX6j +MbCTV12nEgm8FEhZQ3azxLnO17gbJ2A+Ksm/IIwnTWlqvvMZD5qTQ7L3qZuCtbWQ ++EGC/H1SDjhiwvjHcXP61/tYL/peApBSoj0L4kC+U/VaNyvicudKk08CgYEAr46J +4VJBJfZ4ZaUBRy53+fy+mknOfaj2wo8MnD3u+/x4YWTapqvDOPN2nJVtKlIsxbS4 +qCO+fzUV17YHlsQmGULNbtFuXWJkP/RcLVbe8VYg/6tmk0dJwNAe90flagX2KJov +8eDX129nNpuUqrNNWsfeLmPmH6vUzpKlga+1zfUCgYBrbUHHJ96dmbZn2AMNtIvy +iXP3HXcj5msJwB3aKJ8eHMkU1kaWAnwxiQfrkfaQ9bCP0v6YbyQY1IJ7NlvdDs7/ +dAydMtkW0WW/zyztdGN92d3vrx0QUiRTV87vt/wl7ZUXnZt1wcB5CPRCWaiUYHWx +YlDmHW6N1XdIk5DQF0OegwKBgEt7S8k3Zo9+A5IgegYy8p7njsQjy8a3qTFJ9DAR +aPmrOc8WX/SdkVihRXRZwxAZOOrgoyyYAcYL+xI+T9EBESh3UoC9R2ibb2MYG7Ha +0gyN7a4/8eCNHCbs1QOZRAhr+8TFVqv28pbMbWJLToZ+hVns6Zikl0MyzFLtNoAm +HlMpAoGBAIOkqnwwuRKhWprL59sdcJfWY26os9nvuDV4LoKFNEFLJhj2AA2/3UlV +v85gqNSxnMNlHLZC9l2HZ3mKv/mfx1aikmFvyhJAnk5u0f9KkexmCPLjQzS5q3ba +yFuxK2DXwN4x46RgQPFlLjOTCX0BG6rkEu4JdonF8ETSjoCtGEU8 +-----END RSA PRIVATE KEY----- diff --git a/spec/fixtures/test_certs/ca/ca.crt b/spec/fixtures/test_certs/ca/ca.crt deleted file mode 100644 index d3a1abb6..00000000 --- a/spec/fixtures/test_certs/ca/ca.crt +++ /dev/null @@ -1,32 +0,0 @@ ------BEGIN CERTIFICATE----- -MIIFeTCCA2GgAwIBAgIUU+VHJ91JsLLA1GJYC+UchNfw3hEwDQYJKoZIhvcNAQEL -BQAwTDELMAkGA1UEBhMCUFQxCzAJBgNVBAgMAk5BMQ8wDQYDVQQHDAZMaXNib24x -DjAMBgNVBAoMBU15TGFiMQ8wDQYDVQQDDAZSb290Q0EwHhcNMTkwNzE1MTMxMTI5 -WhcNMjQwNzE0MTMxMTI5WjBMMQswCQYDVQQGEwJQVDELMAkGA1UECAwCTkExDzAN -BgNVBAcMBkxpc2JvbjEOMAwGA1UECgwFTXlMYWIxDzANBgNVBAMMBlJvb3RDQTCC -AiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAMtTMqAWuH17b9XqPa5L3HNq -gnZ958+gvcOt7Q/sOEvcDQJgkzZ+Gywh5er5JF2iomYOHiD5JncYr4YmRQKuYfD6 -B1WI5FuQthD/OlA1/RHqtbY27J33SaO66ro8gal7vjHrXKQkefVYRwdfO6DqqbhV -6L4sMiy8FzQ55TMpoM35cWuvoAMxvSQqGZ4pYYKnfNSGhzHvssfNS1xu/Lwb7Vju -4jPhp+43BkGwEimI5km7jNC1nwjiHtxDsY/s93AKa/vLktXKUK5nA3jjJOhAbRTV -nbOAgxFt0YbX98xW/aUqscgBUVs9J/MyTRMwVKJ7Vsmth1PdJQksUASuzESlSPl0 -9dMjTQ+MXzJDt0JvX8SIJPmbBng78MSaCUhpOZiii1l2mBfPWejx20I/SMCUNmzb -wm2w9JD50Jv2iX4l4ge4H1CIK1/orW1pdY9xPL0uKYm6ADsDC0B8sGgNMBXeB6aL -ojY1/ITwmmfpfk9c/yWPfC7stHgCYRAv5MfGAsmv0/ya5VrWQGBJkFiYy1pon6nx -UjCbgn0RABojRoGdhhY3QDipgwmSgFZxr064RFr1bt/Ml3MJmPf535mSwPdk/j/z -w4IZTvlmwKW3FyMDhwYL/zX7J0c6MzMPLEdi73Qjzmr3ENIrir4O86wNz81YRfYk -g9ZX8yKJK9LBAUrYCjJ3AgMBAAGjUzBRMB0GA1UdDgQWBBShWnSceOrqYn9Qa4WG -dIrvKNs/KzAfBgNVHSMEGDAWgBShWnSceOrqYn9Qa4WGdIrvKNs/KzAPBgNVHRMB -Af8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQBRQK0m3t5h2Y3CUCJYLMiCUge4 -UOzvpCoawSXH1FP2ycA+P1bP8H8htjwvV334ZADlQrDQRu0hqa1T+DxwhLxNOxgE -1XCthN3TTyd3O1mT4NmT6mcn2wYSn/JC6fPwFcloX8BcUvxl+xwmOgL/pzgf1ekK -MVS0n+r3bzdFTgGnvsmxmPHe2bUhyXXqzQIx3ObSGtuKYUu7aZEysEtJhaR+vGTd -jjTOV2S71edVlKTxRLZpHgoTZpBL/phwRQ63vdef4ftNGs0glGDc0yqXGMxMALOl -Up7+H4HI99rldZcul6oZ+ORltt047Hk7ctWb20SqxEH9tGLXKm6hDEL9HzyFXeyJ -DAue1GF+3H0KvsjSs5XH7LHMuJDCuSP64+h9gzkI+q06oBNX/9pQyQaHj0K4don8 -lWOMLI4gQibV7R1Opt2feA8MwWxouP/yni8IX6sPePVQ+fLEk1C+Kg+x6k1yQHEM -36BEP6iYOYvqG0OIjMas2U7Yhn2wWlVm9It3WMyaW8ZPI8kwc3dx715dZuNg/zjd -rJS678BNBVxInc7dzpY6el0Lr70CGwiJpX/N9P1yiTFZ7GZm3Kax8QnTtvqXzRIy -sBgt8BVZHUe1lWFYlG+jlakiXqz752nmHuwif7iBI4iWzRmW2vYPfTEmYPRLZES2 -nIg9fQPvVw+fIHACZQ== ------END CERTIFICATE----- diff --git a/spec/fixtures/test_certs/ca/ca.key b/spec/fixtures/test_certs/ca/ca.key deleted file mode 100755 index e39ef42a..00000000 --- a/spec/fixtures/test_certs/ca/ca.key +++ /dev/null @@ -1,51 +0,0 @@ ------BEGIN RSA PRIVATE KEY----- -MIIJKAIBAAKCAgEAy1MyoBa4fXtv1eo9rkvcc2qCdn3nz6C9w63tD+w4S9wNAmCT -Nn4bLCHl6vkkXaKiZg4eIPkmdxivhiZFAq5h8PoHVYjkW5C2EP86UDX9Eeq1tjbs -nfdJo7rqujyBqXu+MetcpCR59VhHB187oOqpuFXoviwyLLwXNDnlMymgzflxa6+g -AzG9JCoZnilhgqd81IaHMe+yx81LXG78vBvtWO7iM+Gn7jcGQbASKYjmSbuM0LWf -COIe3EOxj+z3cApr+8uS1cpQrmcDeOMk6EBtFNWds4CDEW3Rhtf3zFb9pSqxyAFR -Wz0n8zJNEzBUontWya2HU90lCSxQBK7MRKVI+XT10yNND4xfMkO3Qm9fxIgk+ZsG -eDvwxJoJSGk5mKKLWXaYF89Z6PHbQj9IwJQ2bNvCbbD0kPnQm/aJfiXiB7gfUIgr -X+itbWl1j3E8vS4piboAOwMLQHywaA0wFd4HpouiNjX8hPCaZ+l+T1z/JY98Luy0 -eAJhEC/kx8YCya/T/JrlWtZAYEmQWJjLWmifqfFSMJuCfREAGiNGgZ2GFjdAOKmD -CZKAVnGvTrhEWvVu38yXcwmY9/nfmZLA92T+P/PDghlO+WbApbcXIwOHBgv/Nfsn -RzozMw8sR2LvdCPOavcQ0iuKvg7zrA3PzVhF9iSD1lfzIokr0sEBStgKMncCAwEA -AQKCAgBVVGU6qk5i2xrkO5oHO+8YwOpfsBdJG7yIEsYamJhHveH3zW/6vpHIw7Eq -G8UXRtnA2svqKqXp9YI0Wns71NNlvoi1bO3pP6IpH/PpFb9PdaEtB3/mC5HsFNXN -svb3mecILC6E9In6XUHx5hWwQstXgTZcGVA1VfqnAGUgQ6goyTbAasRMkuM9+i0m -I1e47XGF/69dVatCDvZBpJKMn2vMlvR3sYw4fP8zMiFtLPb4mq1OKerEX6Fz7zTl -oh119+m5koXdEzso9jKO2UTz85XT2JKGcriO5/e3D4v/RcLNPk2+Ek+CavgJKGMQ -WogqaHjTyu+wUm7omqA6VuGDLZqh1r0xYR+EXVMAudLjy7/NtAaE4MVOqVRs4WVd -sGccyirkTosxlvK3/vTfsp0VQtreBbxO1maqR5od0aa36MrP4Sk5O07yB9GAthp8 -5qlqtiYaO2Hcq2KJjKPUGwXlAWFZtENQe+G/jy+gYVDwKRInK7f7HubZlAMwsq2S -LSjtgvhqayAMsa7HoeevSVPLVdFb1IVkIw2jgMhXRgxmKa8WzbAUs124f9ey9z81 -si7w+qpZHq9LGChBjweTbd0abCianyRGHZIlDBE43XEcs3easxuHM6eOoJz0B7aj -oCXBCo/6Zd0om4ll5jva2+VOH2wTkZk7OhPiGU2f4g7kTJNAAQKCAQEA7YT3UdjN -HybAD3c/a5Kh17R4zCvymQkUWBs80ZaV9LlCYgie6aWlY6e+9m6vpDhY8dJZd+Zm -hlAF3VitRLw3SQUEImEC1caS1q99o1eQxMGu+mk9OiibF9PzZwoPE6zt5EZ0I/Ha -ifmf0Jn3xLyWF4qOKvO3gbWLknirDKffzNnWtFr4SQlEwtOR4m7IFDEz7e7RoGlv -K1qEFyK1PCfR8GeVHXWkJ3udHJRIZlEtNNLkHzU4nCRRYTvQ4l67rD9Tj7CoLbH1 -2OGSSvAkg+1lTBBs6RXZvhVvLrJVtQTXR7Oi8Z3mi3iJu9oWYa/OFaJl4lAN9xTe -QY0u0J1+AS5qAQKCAQEA2yUhO3rC1A7qHgbY4dAsx8f8yy9D0yCrI9OLnPQNF3ws -4mC1fUS60+07u0FYkgU3zIDwdLj5nsxWjB4ciY4hCgwz7pNJWlowurrfTvQNlqvC -m+Jrt1HYoaV+73mSj+rGv89HXWBW2I/1ED37BRoNB/YIMd/MUL8h0ubt3LIVaRow -41DT3dM969zuw3Avpx1uXQdnijJ1kA3oHpJ756YLHri/Nv6K0hJmGAbMrHPRxuhY -hYrxPJPVlp5mWUIjNkKoaNl3du3a6iVSbf7W15LxhAHmkKozxnhqoMldI6C8R548 -IKGyW4wo3GQvcEGPhgGnz2lswmvtx/6cWMv81b7sdwKCAQAXiC3sqPshk/hBUAIz -iTKJqXKyX8RITzL5y7EJ3s56kYQ3LD16TpQFPJBZ3/t83KxLQRjmHSiZNAJwvKFm -BvO/Q0T2lv/t6B+SL47WCQ3pwHqyioyrX2yGPtSFp+R4gZCMJkLJcOPC+b1QsIBw -uHJyYKLXNJBAxJjQaS4hMdylcguokL66lbV/S/DPK0SdY4aOkzOnneqKtAwUMrcb -/6H4HHsUkRwzYTbepv5JTM+axS4evWofZiW96Ww3kUUsupVvzgPLiy8dTrPswsAL -ZhC8KYBw015gS8VZLgf5yEH/85c4MvmtZcLXnrLK+N0FHbLUajQH/8RJYFB8EK50 -NYIBAoIBAQCNO8/AIqz/uCEAew858U168BOm62492lcRvtvCqrLpSNkwiH1PH4V8 -4e7WDxZC/WPpw8u0niYaRr0cjqd7q4L1k8nAX2It/eRb4+XZX4aGbWn9xx3+xpvk -CeHV+rcPU5MFuVPYBSfTaGvbLObjcdemItVDN2XZQGVPJA92ZbtQwlVxmv0AgIzu -vrOOd3XusRQYlpYPRdfooF3RnjyGncea5BwwGDpliwALSg6MshQnqkSqxFIW5XwJ -F0sFCY/h/3HEKStKFZ85uhX21/+kbYqDtinfYCIALkkiGMSTGptdWMiNi0iEob8P -0u2T3lzeU6DQFrTGVIKpmxkuTUFEjEifAoIBAH4nyu4ei4u7sGUhSZ79egUbqBZR -pjYblM8NB5UOAVmmbaswCWozsnsaBdIgymeokQXDPqIOwadP8IgGrgIxS5phdgvE -CNepxQDoVTXYEecPjc0LL4Kb+urmJL7HEP9BteIkc+7l8b9USDhNlJeCXICoJKBu -bNxgm60ZuoObx7h5APq9wC4x8Xj7AxQKu57Ied/tRFPCHW4UbhZhJfrnS2xTTk0u -z71AS7akI/NPfm3nLviISZeDzTgYs6vLYr/j4JUlcw1z6UpX4DvNm/MULi7ItXP5 -yV2H8jpCdjAe+OoC3OHTuQ8FQR29y758HUY7iF8ruuqUSWxF7pfL/a27EMw= ------END RSA PRIVATE KEY----- diff --git a/spec/fixtures/test_certs/es.crt b/spec/fixtures/test_certs/es.crt new file mode 100644 index 00000000..228dccd3 --- /dev/null +++ b/spec/fixtures/test_certs/es.crt @@ -0,0 +1,20 @@ +-----BEGIN CERTIFICATE----- +MIIDNjCCAh6gAwIBAgIUF9wE+oqGSbm4UVn1y9gEjzyaJFswDQYJKoZIhvcNAQEL +BQAwNDEyMDAGA1UEAxMpRWxhc3RpYyBDZXJ0aWZpY2F0ZSBUb29sIEF1dG9nZW5l +cmF0ZWQgQ0EwHhcNMjEwODEyMDUxNTI3WhcNMjQwODExMDUxNTI3WjANMQswCQYD +VQQDEwJlczCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAK2S2by0lgyu +1JfgGgZ41PNXbH2qMPMzowguVVdtZ16WM0CaEG7lnLxmMcC+2Q7NnGuFnPAVQo9T +Q3bh7j+1PkCJVHUKZfJIeWtGc9+qXBcO1MhedfwM1osSa4bfwM85G+XKWbRNtmSt +CoUuKArIyZkzdBAAQLBoQyPf3DIza1Au4j9Hb3zrswD6e7n2PN4ffIyil1GFduLJ +2275qqFiOhkEDUhv7BKNftVBh/89O/5lSqAQGuQ1aDRr8TdHwhO71u4ZIU/Pn6yX +LGBWrQG53+qpdCsxGvJTfbtIEYUDTN83CirIxDKJgc1QXOEldylztHf4xnQ7ZarJ +tqF6pUzHbRsCAwEAAaNnMGUwHQYDVR0OBBYEFFQUK+6Cg2kExRj1xSDzEi4kkgKX +MB8GA1UdIwQYMBaAFMgkye5+2l+TE0I6RsXRHjGBwpBGMBgGA1UdEQQRMA+CDWVs +YXN0aWNzZWFyY2gwCQYDVR0TBAIwADANBgkqhkiG9w0BAQsFAAOCAQEAinaknZIc +7xtQNwUwa+kdET+I4lMz+TJw9vTjGKPJqe082n81ycKU5b+a/OndG90z+dTwhShW +f0oZdIe/1rDCdiRU4ceCZA4ybKrFDIbW8gOKZOx9rsgEx9XNELj4ocZTBqxjQmNE +Ho91fli5aEm0EL2vJgejh4hcfDeElQ6go9gtvAHQ57XEADQSenvt69jOICOupnS+ +LSjDVhv/VLi3CAip0B+lD5fX/DVQdrJ62eRGuQYxoouE3saCO58qUUrKB39yD9KA +qRA/sVxyLogxaU+5dLfc0NJdOqSzStxQ2vdMvAWo9tZZ2UBGFrk5SdwCQe7Yv5mX +qi02i4q6meHGcw== +-----END CERTIFICATE----- diff --git a/spec/fixtures/test_certs/es.key b/spec/fixtures/test_certs/es.key new file mode 100644 index 00000000..d283ded1 --- /dev/null +++ b/spec/fixtures/test_certs/es.key @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEowIBAAKCAQEArZLZvLSWDK7Ul+AaBnjU81dsfaow8zOjCC5VV21nXpYzQJoQ +buWcvGYxwL7ZDs2ca4Wc8BVCj1NDduHuP7U+QIlUdQpl8kh5a0Zz36pcFw7UyF51 +/AzWixJrht/Azzkb5cpZtE22ZK0KhS4oCsjJmTN0EABAsGhDI9/cMjNrUC7iP0dv +fOuzAPp7ufY83h98jKKXUYV24snbbvmqoWI6GQQNSG/sEo1+1UGH/z07/mVKoBAa +5DVoNGvxN0fCE7vW7hkhT8+frJcsYFatAbnf6ql0KzEa8lN9u0gRhQNM3zcKKsjE +MomBzVBc4SV3KXO0d/jGdDtlqsm2oXqlTMdtGwIDAQABAoIBAQCm/VBDz41ImG7p +yu3e6iMeFi7HW5SKdlRUS5dJbHT1uBWJAm/q8TbwvnUBVdsn9cKWY06QYDPQBjAy +0LxRSIKivjyl+aIJDZbbEUXrmk/M0zT9rHtgSc2isM8ITH6IHw5q7lmNMPLYOu6T +IMvfTDtADBOOTV/vF+/4NKf5GCUXVt1XTzLBFMK0p/ZoI7Fsw7fhH6FR12vk0xA4 +BEC4pwRbGfHo7P31ii0by8epkve93tF4IZuFmN92A84bN1z7Kc4TYaSbua2rgguz +FzMyWpsTxr363HzCK1xOJb6JyJOiXbq4+j2oqtne3GIvyozJeiyKRgjLIMoe/LV7 +fPPc5wlhAoGBAOD3z0JH2eyR/1RHILFsWInH2nDbKHHuCjhFIL2XloeXsJkiJZ95 +BpdjExMZCqD44tPNRW/GgWKwoVwltm6zB0aq0aW/OfOzw6fhKt1W+go47L7Tpwap +VQgy6BFXSueUKfQDlZEWV4E2gakf8vOl0/VRQExae/CeKf1suEedQaErAoGBAMWE +LOmNDEU2NFqghfNBAFYyFJst3YnBmSmlL7W22+OsfSK/PhxnJbuNHxMgxpg9rieW +tVyjuZRo/i7WLVm3uG+dK1RJ9t8Y6kpYkCRKpi9G8DBOj3PSulOybBr+fdRfW9mf +8UmqOjOkrhxXPkchc9TY4EM7/1XeKvEidlIp0gvRAoGAAurz4zYvW2QhXaR2hhaT +p2XSLXiKM8AUndo3rH3U0/lhrvrEZicZsMj2LF88xg20U27sIaD/eJo13Y4XqaPk +ykPY6D9srv574SeIeMpx/8PxPiBcoDd+BNc0L1VkgVBoouORAwq5I9HjKKBjdEmI +UDw3i0X5KYvDm6fXVAZ0HXUCgYBWc4To8KiXPqNpq2sVzrSkBaWJSmj2G7u7Q6b/ +RTs3is72v3gjHG6iiaE5URY7mnu4rjlRhAP9Vnsy6uHMrCJZEBTf/sPEYHZj9iGZ +EOduOAF3U1tsmaaebbDtm8hdhSOBvITy9kQlSIZAt1r17Ulytz5pj0AySFzJUIkz +a0SZkQKBgCWixtUxiK8PAdWhyS++90WJeJn8eqjuSAz+VMtFQFRRWDUbkiHvGMRu +o/Hhk6zS46gSF2Evb1d26uUEenXnJlIp6YWzb0DLPrfy5P53kPA6YEvYq5MSAg3l +DZOJUF+ko7cWXSZkeTIBH/jrGOdP4tTALZt6DNt+Gz7xwPO5tGgV +-----END RSA PRIVATE KEY----- diff --git a/spec/fixtures/test_certs/test.crt b/spec/fixtures/test_certs/test.crt deleted file mode 100644 index 11d4a238..00000000 --- a/spec/fixtures/test_certs/test.crt +++ /dev/null @@ -1,36 +0,0 @@ ------BEGIN CERTIFICATE----- -MIIGQjCCBCqgAwIBAgIBAzANBgkqhkiG9w0BAQsFADBMMQswCQYDVQQGEwJQVDEL -MAkGA1UECAwCTkExDzANBgNVBAcMBkxpc2JvbjEOMAwGA1UECgwFTXlMYWIxDzAN -BgNVBAMMBlJvb3RDQTAeFw0xOTA3MTUxMzEzMDVaFw0yMjA0MTAxMzEzMDVaMFMx -CzAJBgNVBAYTAlBUMQswCQYDVQQIDAJOQTEPMA0GA1UEBwwGTGlzYm9uMQ4wDAYD -VQQKDAVNeUxhYjEWMBQGA1UEAwwNZWxhc3RpY3NlYXJjaDCCAiIwDQYJKoZIhvcN -AQEBBQADggIPADCCAgoCggIBAMYhP2zPOE3ke9naeK+cIPNV91htuoGGARs+mlY/ -IVxXSvau2ZZ94rkQR2xNL8TLijBNx46mU+kCniy8X5r+LX9seGqdBhhTh/tCJzh8 -MCzMt2JIijSjVyw28iiCb8/669LMTp5lFlRKajj11jlIpIm3o+OHqUzYwcSOw8og -p0A3nvAQ33Srghm/oAcT2umGrFyYXWT6PnGaEJRLUQn7LuHJnRLseCF2Cn/RzFK7 -/tiVVjImmQiVB3dE9fMR/pVJiO2v0COnWuG+/brXWrQIHk0AuD8pHc6Iw9iZODkc -Ao53B41qbvqcbdXFN5XfL4tb+lkBuLioCX7j9zR44awvuj9hKfuqFOFTUBZL2RjV -bFMKspGHnytQZF+a+mc5H33G9HiPP3jZE2JjrWlOay+j6ImylMgjcZmHAgaUe3ET -1GfnSVZBwO4MMd85taHNvitLnkEREjANSoPUuAJF3SKRHE9K8jUAzhyXflvgNNoM -tyczoQ5/L5BNiyA2h+1TU8jWicNDtl1+CtOsgEVBBHA6p/IHhsHbNZWPrYtIO9mh -hiJw1R5yrITXnjZY0rObITwyt/e6Sc3YnoQfsSGaLJEG0aDc0RALAhgzj+RY8086 -2RKOyfdw1sw1RmJKdCf+dOzhPyDpvauvCxrL8UZQTzcBs+qpxOWnZFRWeNsLwoDn -6JXXAgMBAAGjggEmMIIBIjAJBgNVHRMEAjAAMBEGCWCGSAGG+EIBAQQEAwIGQDAz -BglghkgBhvhCAQ0EJhYkT3BlblNTTCBHZW5lcmF0ZWQgU2VydmVyIENlcnRpZmlj -YXRlMB0GA1UdDgQWBBRvvz0yGw6Tz2UxbBLAGyzVMtcMUDCBiAYDVR0jBIGAMH6A -FKFadJx46upif1BrhYZ0iu8o2z8roVCkTjBMMQswCQYDVQQGEwJQVDELMAkGA1UE -CAwCTkExDzANBgNVBAcMBkxpc2JvbjEOMAwGA1UECgwFTXlMYWIxDzANBgNVBAMM -BlJvb3RDQYIUU+VHJ91JsLLA1GJYC+UchNfw3hEwDgYDVR0PAQH/BAQDAgWgMBMG -A1UdJQQMMAoGCCsGAQUFBwMBMA0GCSqGSIb3DQEBCwUAA4ICAQCaABHQxm6mtrM9 -f7kbgzuhEc47Q+bgrbjxeoIVOeO2Zshdw0SZlfkWvWe0622WSeWMsTBJ3hoaQwZe -9FUf1lnsWe6u6oOckiG9OjE0TyXJ7+eghdL1HPeXgJ+4ihwJsRtkNEljWf4HS7/n -y5LaFhcXdn2ZdbUKJ7z7zXqzh2Cp8VUBtsR+/IdiLjSN81dQou77/a2M/a/7BI2Z -HhUlUx1T7jHzNllJBRF3IaOk72yjoU4cL0qVy9874SXPwdpeFHtvS4TdQTLqnAGR -liHJcB1ZNz1sVOXndw3Wbvv6iB5y+IX/Y/kRSHS6zpZGdAb7ar/Vgl+Uvs3fKi44 -y9hq2b49bYlcSQMtmlimCBDiu82z0aYtVFLalZ2L/W7CMaeE3jpyzu/bbygRv/Bp -lKSaUtaFIVgiuRBPwIBDMyai3CJ5L+dJrJPU2JzzQvtJGFQCFCIHd9rqweubZB6V -re5cUn4dxlxA5SkZ0amFFV5DpP0YhThA/gq0t/NeWRmCEEBWNXZaqFmDhiYS5mnu -Z+NUtv8E332S46RdfneHe961SlMXEFC96I+1HOjXHdXlqKfOU8Qvy8VzsnpjuNE5 -VTrvnAM1L3LwqtYQYfUWUHYZFYdvh8layA2ImNE7yx/9wIIkw/L1j9m71Upi6WKR -FKbYFqzgpWksa+zZ2RYYplUAxq0wYw== ------END CERTIFICATE----- diff --git a/spec/fixtures/test_certs/test.key b/spec/fixtures/test_certs/test.key deleted file mode 100644 index 769761c7..00000000 --- a/spec/fixtures/test_certs/test.key +++ /dev/null @@ -1,51 +0,0 @@ ------BEGIN RSA PRIVATE KEY----- -MIIJKQIBAAKCAgEAxiE/bM84TeR72dp4r5wg81X3WG26gYYBGz6aVj8hXFdK9q7Z -ln3iuRBHbE0vxMuKME3HjqZT6QKeLLxfmv4tf2x4ap0GGFOH+0InOHwwLMy3YkiK -NKNXLDbyKIJvz/rr0sxOnmUWVEpqOPXWOUikibej44epTNjBxI7DyiCnQDee8BDf -dKuCGb+gBxPa6YasXJhdZPo+cZoQlEtRCfsu4cmdEux4IXYKf9HMUrv+2JVWMiaZ -CJUHd0T18xH+lUmI7a/QI6da4b79utdatAgeTQC4PykdzojD2Jk4ORwCjncHjWpu -+pxt1cU3ld8vi1v6WQG4uKgJfuP3NHjhrC+6P2Ep+6oU4VNQFkvZGNVsUwqykYef -K1BkX5r6Zzkffcb0eI8/eNkTYmOtaU5rL6PoibKUyCNxmYcCBpR7cRPUZ+dJVkHA -7gwx3zm1oc2+K0ueQRESMA1Kg9S4AkXdIpEcT0ryNQDOHJd+W+A02gy3JzOhDn8v -kE2LIDaH7VNTyNaJw0O2XX4K06yARUEEcDqn8geGwds1lY+ti0g72aGGInDVHnKs -hNeeNljSs5shPDK397pJzdiehB+xIZoskQbRoNzREAsCGDOP5FjzTzrZEo7J93DW -zDVGYkp0J/507OE/IOm9q68LGsvxRlBPNwGz6qnE5adkVFZ42wvCgOfoldcCAwEA -AQKCAgA1FkOATCWx+T6WKMudgh/yE16q+vu2KMmzGxsPcOrnaxxS7JawlBpjq9D3 -W9coy8DDIJQPzNE+5cyr/+0+Akz+j3nUVy6C5h7RW/BWWjAuUMvyMa2WXQ3GcxJ/ -eDOtbnYxjTyjhEJvY2EC0hwMTUKJBAONu5PJW2rP19DuH8Gwmzai7GJzSGEbtRST -0OYfHE6ioNCldce1eKpokaWtHvh41ySXJXUqwg4eIYC1ylmGfr0RwvXOLuBJPNkJ -wBCOv51I0oragsT/J8Wkgn9zLZmw2DiF8+ZgqJSRPLyr0K1+rrX/Vj1WOQPU+3rh -VWPP211A7A0qrRuePEbIcHtHP6KPUCepABL44K33zyyOydmnJ7vg3dsW7AN7+Y6O -H4B24d1ogn4TJwzpZCfRvqJJVu2wsnzleng9PcpXyHhldB6S9h2fPpNqDUBvfxMv -w/fGZ2ZpOeUKRfQ7VVR3XIWwFq/eDhzLicHipaoM+6gKeOZdJPAc0Ew5jvOXQSBD -CYCM12a8gnEYd55NLo/fF3wX6Wdq/X6EbWW97gwtmmEqnhcZMxLdeMuKyli22JyX -Ik7QIDsmPSWhCkI2JvQ+CAZZp6oMEKuSb7UqqfACQreIuxCUmPTZq/pAEUGSCZGP -wnWqOk5jwxJ4d5TQm7g2RgPC6lTd7as1m4+JB8H1cNVpS2d0AQKCAQEA5tL9WjIK -u6x1h4a4KPmOQ9B34GxmGM+P9/bQkkJsWYf5eG1MlYPAigAiN0PMYPHDweeROsOO -rvmJiWXtmPMVFFSalHVotN6LMj400HhHObg7dADDYTBBGX6QuljxBX9jgUiKSOKO -66ngXEyRdUmTAbral3UuSCFGcqG8Khd3taULO2q5JQLEioFT+Lr0UzHywVSJq06O -k37aC3zpiQj4S/bJG4GOadYDIENq+gRCIU7Hn4pS1qtxLYMyhtNDzK0pVriYNj9T -ydHUxSKZO6ogM4423wVKnKOa2Cj4rgKBDHGpJJ9R0ZhrTbAQOa8LgDy1P6aMlvH/ -t9bG/HClmuHrFwKCAQEA271gZpIeCFG/XYIbxO8Uh9wG2cHrt7idw0iFTW4PpOBk -CGExq7WlkR29BkzxTDEXz5bSgDa8Q83cDf9EjSHVJYVGSYnodz7ZV8jZd2PUdCV1 -dL1kHV7vqz/KKxuCp7icCwzG9rQ1CjsTv8gBM3cN6DrZgw/2F+HjQpCkeyxc6KBR -Q+167yaLvOv3W0BHdSywtiNDU48oSSBpEK2anh7ShjG3BaEr/gAqGsTvxjsl7zDg -6MZFegeYPFIEH4ftvLZugPgd3NBg00CfsNRkjVWsH51y4gBy2ZL8d31Q2D2FI94s -he57Trvs8t8Y9QHGTMCuUk9IwRBpgapmW+c6G65jQQKCAQB0IPquAjc8LIwE31aP -5t4YaC2nj2HeYa8BBf/xVdHFSY/Ncs/w+RRJWb/dJhMRQxFF4QdEFVcX2RvFMK7V -IJceX2JWBqvjM4O8h2dy6tCKzZG7zxZ9MxXNicuutUith2W8iY5pFPLqxdDHHw6f -f6CiYivPv3jFeVhEG/LbCmuDy8FW5162rCnNtTtWDFkX8i077xhEQ4Wf11ZEKVgl -RYoGTeboG8pWnQF9ne2YU8Qhlc0BC0qaDi8mwrcM9gVKWGRP6RdLU5kIFLWDaODH -D9Sbm5UnpzXekME6t4JFqaTbaeO7NRyo4pI5x7aiDtsubVyS5WweFSqeh0QdhV8M -CVWJAoIBAQCJ7OSFfVG8hxNG7lPf2PFaFZF3PXFncEoWOX7yixLmurIPUHKNBQdX -fwMW4VTULjxz4IdgUvr41E47uu6cZ5ASbQUhQ57WumxR3ZAikFbaUGjZTcd5aA2n -v/J1/F6WSBoqFWsNjy97rHlI95FJbIEeM1I0IeCmPPMY5RFY/w+SNI7NxFJVqiwr -+TDZ5g70TYjdymSIHmN7AwzvmwhiF5atBKeMsQ2b8R58jwCxvI6jBFsnwMv7PKkh -s5lC8V8YBKp36UVVRLaB4x5ZL/etfwj7Dyj9EqsERm6R0ebc1ECtstbfekGLugmQ -qNhRcTu3EXpZz8oq5NJUwVBef1TJ6zwBAoIBAQC7Oq5AdmLzYOX0AYUuT3Cjgl0u -3Tq1c1uqlVaxQGjA3oqua4SR0+kvmRZbZGLpLAVnnwOjUEfUFBBYgP/5Mo/OiCkQ -C8eWkSQKXy6OFy5wh4mbL5oJttKjM4ZoB0gpF31+tGOmrfJwacqEPnyZLKzkgBdG -djVVKP+HH4XUdB2VXst8tvcif+VTrUsD1nuhGMOgbiHZkdx3eug05wfhnYvWljA+ -r/4xcq7TmKSoJqkb0OcOkhqjeZWleA6xRtEvHPGRzbEM67FVsVTxr/N9BX5tS9zu -YLCNI3tTNsDV0Ac+4rl46rghQ/n2TNSEpwvA/pjytsdPXLOpoapWirrsEiXf ------END RSA PRIVATE KEY----- diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 492d9b47..606eb8b3 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -600,7 +600,7 @@ def synchronize_method!(object, method_name) it "should set authorization" do plugin.register client = plugin.send(:client) - auth_header = client.transport.instance_variable_get(:@options)[:transport_options][:headers][:Authorization] + auth_header = client.transport.instance_variable_get(:@options)[:transport_options][:headers]['Authorization'] expect( auth_header ).to eql "Basic #{Base64.encode64('elastic:my-passwd-00').rstrip}" end @@ -637,7 +637,7 @@ def synchronize_method!(object, method_name) it "should set authorization" do plugin.register client = plugin.send(:client) - auth_header = client.transport.instance_variable_get(:@options)[:transport_options][:headers][:Authorization] + auth_header = client.transport.instance_variable_get(:@options)[:transport_options][:headers]['Authorization'] expect( auth_header ).to eql "ApiKey #{Base64.strict_encode64('foo:bar')}" end diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb index 059cf7de..fc35dd35 100644 --- a/spec/inputs/integration/elasticsearch_spec.rb +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -11,7 +11,7 @@ 'query' => '{ "query": { "match": { "message": "Not found"} }}' } } let(:plugin) { described_class.new(config) } let(:event) { LogStash::Event.new({}) } - let(:client_options) {{}} + let(:client_options) { Hash.new } before(:each) do @es = ESHelper.get_client(client_options) @@ -47,17 +47,29 @@ end describe 'against a secured elasticsearch', :secure_integration => true do - let(:user) { 'simpleuser' } - let(:password) { 'abc123' } - let(:ca_file) { "spec/fixtures/test_certs/test.crt" } - let(:client_options) {{:ca_file => ca_file, :user => user, :password => password}} - let(:config) { super().merge({ - 'user' => user, - 'password' => password, - 'ssl' => true, - 'ca_file' => ca_file }) - } + let(:user) { ENV['ELASTIC_USER'] || 'simpleuser' } + let(:password) { ENV['ELASTIC_PASSWORD'] || 'abc123' } + let(:ca_file) { "spec/fixtures/test_certs/ca.crt" } + + let(:client_options) { { :ca_file => ca_file, :user => user, :password => password } } + + let(:config) { super().merge('user' => user, 'password' => password, 'ssl' => true, 'ca_file' => ca_file) } + it_behaves_like 'an elasticsearch index plugin' + + context "incorrect auth credentials" do + + let(:config) do + super().merge('user' => 'archer', 'password' => 'b0gus!') + end + + let(:queue) { [] } + + it "fails to run the plugin" do + plugin.register + expect { plugin.run queue }.to raise_error Elasticsearch::Transport::Transport::Errors::Unauthorized + end + end + end end - From 7b860c3f702959032c76a64a51294544feedffb6 Mon Sep 17 00:00:00 2001 From: kaisecheng <69120390+kaisecheng@users.noreply.github.com> Date: Fri, 20 Aug 2021 15:57:26 +0200 Subject: [PATCH 131/207] Fixed SSL handshake hang indefinitely with proxy setup (#156) update manticore to 0.7.1 --- CHANGELOG.md | 3 +++ logstash-input-elasticsearch.gemspec | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68dbc30f..f771d85e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.9.3 + - Fixed SSL handshake hang indefinitely with proxy setup [#156](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/156) + ## 4.9.2 - Fix: a regression (in LS 7.14.0) where due the elasticsearch client update (from 5.0.5 to 7.5.0) the `Authorization` header isn't passed, this leads to the plugin not being able to leverage `user`/`password` credentials set by the user. diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 9642c6f1..2803d4e0 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.9.2' + s.version = '4.9.3' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -30,7 +30,7 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'tzinfo' s.add_runtime_dependency 'tzinfo-data' s.add_runtime_dependency 'rufus-scheduler' - s.add_runtime_dependency 'manticore', "~> 0.6" + s.add_runtime_dependency 'manticore', ">= 0.7.1" s.add_development_dependency 'faraday', "~> 0.15.4" s.add_development_dependency 'logstash-devutils' s.add_development_dependency 'timecop' From 8aeb1fd564047baf49a4811d24333abee35b0b0e Mon Sep 17 00:00:00 2001 From: Karol Bucek Date: Mon, 23 Aug 2021 11:15:04 +0200 Subject: [PATCH 132/207] Feat: ecs_compatibility + event_factory support (#149) * Refactor: remove unused local variable * Refactor: remove (unused) codec default * Refactor: review/unify error logging format + reviewed event logging to include meta-data details * Deps: move (unused) plain codec to dev dependency * Deps: drop redundant sequel dependency --- CHANGELOG.md | 4 + docs/index.asciidoc | 47 ++++- lib/logstash/inputs/elasticsearch.rb | 69 ++++--- logstash-input-elasticsearch.gemspec | 10 +- spec/inputs/elasticsearch_spec.rb | 261 +++++++++++++++------------ 5 files changed, 229 insertions(+), 162 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f771d85e..51e23421 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.10.0 + - Feat: added ecs_compatibility + event_factory support [#149](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/149) + ## 4.9.3 - Fixed SSL handshake hang indefinitely with proxy setup [#156](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/156) @@ -6,6 +9,7 @@ header isn't passed, this leads to the plugin not being able to leverage `user`/`password` credentials set by the user. [#153](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/153) + ## 4.9.1 - [DOC] Replaced hard-coded links with shared attributes [#143](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/143) - [DOC] Added missing quote to docinfo_fields example [#145](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/145) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 42ea523f..d28f4fcb 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -83,8 +83,18 @@ Authentication to a secure Elasticsearch cluster is possible using _one_ of the Authorization to a secure Elasticsearch cluster requires `read` permission at index level and `monitoring` permissions at cluster level. The `monitoring` permission at cluster level is necessary to perform periodic connectivity checks. +[id="plugins-{type}s-{plugin}-ecs"] +==== Compatibility with the Elastic Common Schema (ECS) + +When ECS compatibility is disabled, `docinfo_target` uses the `"@metadata"` field as a default, with ECS enabled the plugin +uses a naming convention `"[@metadata][input][elasticsearch]"` as a default target for placing document information. + +The plugin logs a warning when ECS is enabled and `target` isn't set. + +TIP: Set the `target` option to avoid potential schema conflicts. + [id="plugins-{type}s-{plugin}-options"] -==== Elasticsearch Input Configuration Options +==== Elasticsearch Input configuration options This plugin supports the following configuration options plus the <> described later. @@ -99,6 +109,7 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |<>|No | <> |<>|No +| <> |<>|No | <> |<>|No | <> |<>|No | <> |<>|No @@ -197,13 +208,14 @@ Example size => 500 scroll => "5m" docinfo => true + docinfo_target => "[@metadata][doc]" } } output { elasticsearch { - index => "copy-of-production.%{[@metadata][_index]}" - document_type => "%{[@metadata][_type]}" - document_id => "%{[@metadata][_id]}" + index => "copy-of-production.%{[@metadata][doc][_index]}" + document_type => "%{[@metadata][doc][_type]}" + document_id => "%{[@metadata][doc][_id]}" } } @@ -214,8 +226,9 @@ Example input { elasticsearch { docinfo => true + docinfo_target => "[@metadata][doc]" add_field => { - identifier => "%{[@metadata][_index]}:%{[@metadata][_type]}:%{[@metadata][_id]}" + identifier => "%{[@metadata][doc][_index]}:%{[@metadata][doc][_type]}:%{[@metadata][doc][_id]}" } } } @@ -236,11 +249,25 @@ more information. ===== `docinfo_target` * Value type is <> - * Default value is `"@metadata"` + * Default value depends on whether <> is enabled: + ** ECS Compatibility disabled: `"@metadata"` + ** ECS Compatibility enabled: `"[@metadata][input][elasticsearch]"` + +If document metadata storage is requested by enabling the `docinfo` option, +this option names the field under which to store the metadata fields as subfields. + +[id="plugins-{type}s-{plugin}-ecs_compatibility"] +===== `ecs_compatibility` + + * Value type is <> + * Supported values are: + ** `disabled`: CSV data added at root level + ** `v1`,`v8`: Elastic Common Schema compliant behavior + * Default value depends on which version of Logstash is running: + ** When Logstash provides a `pipeline.ecs_compatibility` setting, its value is used as the default + ** Otherwise, the default value is `disabled` -If document metadata storage is requested by enabling the `docinfo` -option, this option names the field under which to store the metadata -fields as subfields. +Controls this plugin's compatibility with the {ecs-ref}[Elastic Common Schema (ECS)]. [id="plugins-{type}s-{plugin}-hosts"] ===== `hosts` @@ -402,4 +429,4 @@ empty string authentication will be disabled. [id="plugins-{type}s-{plugin}-common-options"] include::{include_path}/{type}.asciidoc[] -:default_codec!: +:no_codec!: diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index c88371ef..de37eaf8 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -4,6 +4,9 @@ require "logstash/json" require "logstash/util/safe_uri" require 'logstash/plugin_mixins/validator_support/field_reference_validation_adapter' +require 'logstash/plugin_mixins/event_support/event_factory_adapter' +require 'logstash/plugin_mixins/ecs_compatibility_support' +require 'logstash/plugin_mixins/ecs_compatibility_support/target_check' require "base64" require "elasticsearch" @@ -66,12 +69,16 @@ # # class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base + + include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1) + include LogStash::PluginMixins::ECSCompatibilitySupport::TargetCheck + + include LogStash::PluginMixins::EventSupport::EventFactoryAdapter + extend LogStash::PluginMixins::ValidatorSupport::FieldReferenceValidationAdapter config_name "elasticsearch" - default :codec, "json" - # List of elasticsearch hosts to use for querying. # Each host can be either IP, HOST, IP:port or HOST:port. # Port defaults to 9200 @@ -128,8 +135,9 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # config :docinfo, :validate => :boolean, :default => false - # Where to move the Elasticsearch document information. By default we use the @metadata field. - config :docinfo_target, :validate=> :string, :default => LogStash::Event::METADATA + # Where to move the Elasticsearch document information. + # default: [@metadata][input][elasticsearch] in ECS mode, @metadata field otherwise + config :docinfo_target, :validate=> :field_reference # List of document metadata to move to the `docinfo_target` field. # To learn more about Elasticsearch metadata fields read @@ -184,6 +192,14 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # If set, the _source of each hit will be added nested under the target instead of at the top-level config :target, :validate => :field_reference + def initialize(params={}) + super(params) + + if docinfo_target.nil? + @docinfo_target = ecs_select[disabled: '@metadata', v1: '[@metadata][input][elasticsearch]'] + end + end + def register require "rufus/scheduler" @@ -297,47 +313,41 @@ def process_next_scroll(output_queue, scroll_id) [r['hits']['hits'].any?, r['_scroll_id']] rescue => e # this will typically be triggered by a scroll timeout - logger.error("Scroll request error, aborting scroll", error: e.inspect) + logger.error("Scroll request error, aborting scroll", message: e.message, exception: e.class) # return no hits and original scroll_id so we can try to clear it [false, scroll_id] end def push_hit(hit, output_queue) - if @target.nil? - event = LogStash::Event.new(hit['_source']) - else - event = LogStash::Event.new - event.set(@target, hit['_source']) - end - - if @docinfo - # do not assume event[@docinfo_target] to be in-place updatable. first get it, update it, then at the end set it in the event. - docinfo_target = event.get(@docinfo_target) || {} - - unless docinfo_target.is_a?(Hash) - @logger.error("Elasticsearch Input: Incompatible Event, incompatible type for the docinfo_target=#{@docinfo_target} field in the `_source` document, expected a hash got:", :docinfo_target_type => docinfo_target.class, :event => event) + event = targeted_event_factory.new_event hit['_source'] + set_docinfo_fields(hit, event) if @docinfo + decorate(event) + output_queue << event + end - # TODO: (colin) I am not sure raising is a good strategy here? - raise Exception.new("Elasticsearch input: incompatible event") - end + def set_docinfo_fields(hit, event) + # do not assume event[@docinfo_target] to be in-place updatable. first get it, update it, then at the end set it in the event. + docinfo_target = event.get(@docinfo_target) || {} - @docinfo_fields.each do |field| - docinfo_target[field] = hit[field] - end + unless docinfo_target.is_a?(Hash) + @logger.error("Incompatible Event, incompatible type for the docinfo_target=#{@docinfo_target} field in the `_source` document, expected a hash got:", :docinfo_target_type => docinfo_target.class, :event => event.to_hash_with_metadata) - event.set(@docinfo_target, docinfo_target) + # TODO: (colin) I am not sure raising is a good strategy here? + raise Exception.new("Elasticsearch input: incompatible event") end - decorate(event) + @docinfo_fields.each do |field| + docinfo_target[field] = hit[field] + end - output_queue << event + event.set(@docinfo_target, docinfo_target) end def clear_scroll(scroll_id) @client.clear_scroll(scroll_id: scroll_id) if scroll_id rescue => e # ignore & log any clear_scroll errors - logger.warn("Ignoring clear_scroll exception", message: e.message) + logger.warn("Ignoring clear_scroll exception", message: e.message, exception: e.class) end def scroll_request scroll_id @@ -447,6 +457,9 @@ def parse_user_password_from_cloud_auth(cloud_auth) [ cloud_auth.username, cloud_auth.password ] end + # @private used by unit specs + attr_reader :client + module URIOrEmptyValidator ## # @override to provide :uri_or_empty validator diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 2803d4e0..c19b6fdd 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.9.3' + s.version = '4.10.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -20,17 +20,19 @@ Gem::Specification.new do |s| s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } # Gem dependencies - s.add_runtime_dependency "logstash-mixin-validator_support", '~> 1.0' s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" + s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~> 1.3' + s.add_runtime_dependency 'logstash-mixin-event_support', '~> 1.0' + s.add_runtime_dependency "logstash-mixin-validator_support", '~> 1.0' s.add_runtime_dependency 'elasticsearch', '>= 5.0.5' # LS >= 6.7 and < 7.14 all used version 5.0.5 - s.add_runtime_dependency 'logstash-codec-json' - s.add_runtime_dependency 'sequel' s.add_runtime_dependency 'tzinfo' s.add_runtime_dependency 'tzinfo-data' s.add_runtime_dependency 'rufus-scheduler' s.add_runtime_dependency 'manticore', ">= 0.7.1" + + s.add_development_dependency 'logstash-codec-plain' s.add_development_dependency 'faraday', "~> 0.15.4" s.add_development_dependency 'logstash-devutils' s.add_development_dependency 'timecop' diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 606eb8b3..72f70ea6 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -8,13 +8,11 @@ require "time" require "date" -class LogStash::Inputs::TestableElasticsearch < LogStash::Inputs::Elasticsearch - attr_reader :client -end +require 'logstash/plugin_mixins/ecs_compatibility_support/spec_helper' -describe LogStash::Inputs::TestableElasticsearch do +describe LogStash::Inputs::Elasticsearch, :ecs_compatibility_support do - let(:plugin) { LogStash::Inputs::TestableElasticsearch.new(config) } + let(:plugin) { described_class.new(config) } let(:queue) { Queue.new } it_behaves_like "an interruptible input plugin" do @@ -40,7 +38,13 @@ class LogStash::Inputs::TestableElasticsearch < LogStash::Inputs::Elasticsearch end end - context 'creating events from Elasticsearch' do + + ecs_compatibility_matrix(:disabled, :v1, :v8) do |ecs_select| + + before(:each) do + allow_any_instance_of(described_class).to receive(:ecs_compatibility).and_return(ecs_compatibility) + end + let(:config) do %q[ input { @@ -97,7 +101,6 @@ class LogStash::Inputs::TestableElasticsearch < LogStash::Inputs::Elasticsearch end expect(event).to be_a(LogStash::Event) - puts event.to_hash_with_metadata expect(event.get("message")).to eql [ "ohayo" ] end @@ -120,10 +123,10 @@ class LogStash::Inputs::TestableElasticsearch < LogStash::Inputs::Elasticsearch end expect(event).to be_a(LogStash::Event) - puts event.to_hash_with_metadata expect(event.get("[@metadata][_source][message]")).to eql [ "ohayo" ] end end + end # This spec is an adapter-spec, ensuring that we send the right sequence of messages to our Elasticsearch Client @@ -135,6 +138,7 @@ class LogStash::Inputs::TestableElasticsearch < LogStash::Inputs::Elasticsearch 'query' => "#{LogStash::Json.dump(query)}", 'slices' => slices, 'docinfo' => true, # include ids + 'docinfo_target' => '[@metadata]' } end let(:query) do @@ -405,127 +409,140 @@ def synchronize_method!(object, method_name) allow(client).to receive(:clear_scroll).and_return(nil) end - context 'when defining docinfo' do - let(:config_metadata) do - %q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - docinfo => true - } - } - ] - end + ecs_compatibility_matrix(:disabled, :v1, :v8) do |ecs_select| - it 'merges the values if the `docinfo_target` already exist in the `_source` document' do - config_metadata_with_hash = %Q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - docinfo => true - docinfo_target => 'metadata_with_hash' + before(:each) do + allow_any_instance_of(described_class).to receive(:ecs_compatibility).and_return(ecs_compatibility) + end + + context 'with docinfo enabled' do + let(:config_metadata) do + %q[ + input { + elasticsearch { + hosts => ["localhost"] + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + docinfo => true + } } - } - ] - - event = input(config_metadata_with_hash) do |pipeline, queue| - queue.pop + ] end - expect(event.get("[metadata_with_hash][_index]")).to eq('logstash-2014.10.12') - expect(event.get("[metadata_with_hash][_type]")).to eq('logs') - expect(event.get("[metadata_with_hash][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') - expect(event.get("[metadata_with_hash][awesome]")).to eq("logstash") - end - - context 'if the `docinfo_target` exist but is not of type hash' do - let (:config) { { - "hosts" => ["localhost"], - "query" => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }', - "docinfo" => true, - "docinfo_target" => 'metadata_with_string' - } } - it 'thows an exception if the `docinfo_target` exist but is not of type hash' do - expect(client).not_to receive(:clear_scroll) - plugin.register - expect { plugin.run([]) }.to raise_error(Exception, /incompatible event/) + it "provides document info under metadata" do + event = input(config_metadata) do |pipeline, queue| + queue.pop + end + + if ecs_select.active_mode == :disabled + expect(event.get("[@metadata][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[@metadata][_type]")).to eq('logs') + expect(event.get("[@metadata][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') + else + expect(event.get("[@metadata][input][elasticsearch][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[@metadata][input][elasticsearch][_type]")).to eq('logs') + expect(event.get("[@metadata][input][elasticsearch][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') + end end - end - it "should move the document info to the @metadata field" do - event = input(config_metadata) do |pipeline, queue| - queue.pop + it 'merges values if the `docinfo_target` already exist in the `_source` document' do + config_metadata_with_hash = %Q[ + input { + elasticsearch { + hosts => ["localhost"] + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + docinfo => true + docinfo_target => 'metadata_with_hash' + } + } + ] + + event = input(config_metadata_with_hash) do |pipeline, queue| + queue.pop + end + + expect(event.get("[metadata_with_hash][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[metadata_with_hash][_type]")).to eq('logs') + expect(event.get("[metadata_with_hash][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') + expect(event.get("[metadata_with_hash][awesome]")).to eq("logstash") end - expect(event.get("[@metadata][_index]")).to eq('logstash-2014.10.12') - expect(event.get("[@metadata][_type]")).to eq('logs') - expect(event.get("[@metadata][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') - end + context 'if the `docinfo_target` exist but is not of type hash' do + let (:config) { { + "hosts" => ["localhost"], + "query" => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }', + "docinfo" => true, + "docinfo_target" => 'metadata_with_string' + } } + it 'thows an exception if the `docinfo_target` exist but is not of type hash' do + expect(client).not_to receive(:clear_scroll) + plugin.register + expect { plugin.run([]) }.to raise_error(Exception, /incompatible event/) + end + end - it 'should move the document information to the specified field' do - config = %q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - docinfo => true - docinfo_target => 'meta' + it 'should move the document information to the specified field' do + config = %q[ + input { + elasticsearch { + hosts => ["localhost"] + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + docinfo => true + docinfo_target => 'meta' + } } - } - ] - event = input(config) do |pipeline, queue| - queue.pop + ] + event = input(config) do |pipeline, queue| + queue.pop + end + + expect(event.get("[meta][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[meta][_type]")).to eq('logs') + expect(event.get("[meta][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') end - expect(event.get("[meta][_index]")).to eq('logstash-2014.10.12') - expect(event.get("[meta][_type]")).to eq('logs') - expect(event.get("[meta][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') - end + it "allows to specify which fields from the document info to save to metadata" do + fields = ["_index"] + config = %Q[ + input { + elasticsearch { + hosts => ["localhost"] + query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + docinfo => true + docinfo_fields => #{fields} + } + }] + + event = input(config) do |pipeline, queue| + queue.pop + end + + meta_base = event.get(ecs_select.active_mode == :disabled ? "@metadata" : "[@metadata][input][elasticsearch]") + expect(meta_base.keys).to eq(fields) + end - it "should allow to specify which fields from the document info to save to the @metadata field" do - fields = ["_index"] - config = %Q[ + it 'should be able to reference metadata fields in `add_field` decorations' do + config = %q[ input { elasticsearch { hosts => ["localhost"] query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' docinfo => true - docinfo_fields => #{fields} - } - }] - - event = input(config) do |pipeline, queue| - queue.pop - end - - expect(event.get("@metadata").keys).to eq(fields) - expect(event.get("[@metadata][_type]")).to eq(nil) - expect(event.get("[@metadata][_index]")).to eq('logstash-2014.10.12') - expect(event.get("[@metadata][_id]")).to eq(nil) - end - - it 'should be able to reference metadata fields in `add_field` decorations' do - config = %q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - docinfo => true - add_field => { - 'identifier' => "foo:%{[@metadata][_type]}:%{[@metadata][_id]}" + add_field => { + 'identifier' => "foo:%{[@metadata][_type]}:%{[@metadata][_id]}" + } } } - } - ] + ] - event = input(config) do |pipeline, queue| - queue.pop - end + event = input(config) do |pipeline, queue| + queue.pop + end + + expect(event.get('identifier')).to eq('foo:logs:C5b2xLQwTZa76jBmHIbwHQ') + end if ecs_select.active_mode == :disabled - expect(event.get('identifier')).to eq('foo:logs:C5b2xLQwTZa76jBmHIbwHQ') end + end context "when not defining the docinfo" do @@ -542,9 +559,7 @@ def synchronize_method!(object, method_name) queue.pop end - expect(event.get("[@metadata][_index]")).to eq(nil) - expect(event.get("[@metadata][_type]")).to eq(nil) - expect(event.get("[@metadata][_id]")).to eq(nil) + expect(event.get("[@metadata]")).to be_empty end end end @@ -568,13 +583,13 @@ def synchronize_method!(object, method_name) it "should set host(s)" do plugin.register client = plugin.send(:client) - expect( client.transport.instance_variable_get(:@hosts) ).to eql [{ - :scheme => "https", - :host => "ac31ebb90241773157043c34fd26fd46.us-central1.gcp.cloud.es.io", - :port => 9243, - :path => "", - :protocol => "https" - }] + expect( extract_transport(client).hosts ).to eql [{ + :scheme => "https", + :host => "ac31ebb90241773157043c34fd26fd46.us-central1.gcp.cloud.es.io", + :port => 9243, + :path => "", + :protocol => "https" + }] end context 'invalid' do @@ -600,7 +615,7 @@ def synchronize_method!(object, method_name) it "should set authorization" do plugin.register client = plugin.send(:client) - auth_header = client.transport.instance_variable_get(:@options)[:transport_options][:headers]['Authorization'] + auth_header = extract_transport(client).options[:transport_options][:headers]['Authorization'] expect( auth_header ).to eql "Basic #{Base64.encode64('elastic:my-passwd-00').rstrip}" end @@ -637,7 +652,7 @@ def synchronize_method!(object, method_name) it "should set authorization" do plugin.register client = plugin.send(:client) - auth_header = client.transport.instance_variable_get(:@options)[:transport_options][:headers]['Authorization'] + auth_header = extract_transport(client).options[:transport_options][:headers]['Authorization'] expect( auth_header ).to eql "ApiKey #{Base64.strict_encode64('foo:bar')}" end @@ -658,7 +673,7 @@ def synchronize_method!(object, method_name) it "should set proxy" do plugin.register client = plugin.send(:client) - proxy = client.transport.instance_variable_get(:@options)[:transport_options][:proxy] + proxy = extract_transport(client).options[:transport_options][:proxy] expect( proxy ).to eql "http://localhost:1234/" end @@ -670,7 +685,7 @@ def synchronize_method!(object, method_name) plugin.register client = plugin.send(:client) - expect( client.transport.instance_variable_get(:@options)[:transport_options] ).to_not include(:proxy) + expect( extract_transport(client).options[:transport_options] ).to_not include(:proxy) end end end @@ -756,4 +771,10 @@ def synchronize_method!(object, method_name) end end + + # @note can be removed once we depends on elasticsearch gem >= 6.x + def extract_transport(client) # on 7.x client.transport is a ES::Transport::Client + client.transport.respond_to?(:transport) ? client.transport.transport : client.transport + end + end From dc5976b9dba2d671f0e9899cfbf88d63fd242c1b Mon Sep 17 00:00:00 2001 From: Andrea Selva Date: Wed, 29 Sep 2021 14:14:00 +0200 Subject: [PATCH 133/207] Feat: Insert 'user-agent' header in requests done to the Elasticsearch (#158) This commit adds the header `user-agent` in the form: logstash/ () logstash--/ to each request passed to Elasticsearch, so that Logstash and the plugin could be identified as agent. --- CHANGELOG.md | 3 + lib/logstash/inputs/elasticsearch.rb | 13 ++ ..._elasticsearch_transport_http_manticore.rb | 12 +- logstash-input-elasticsearch.gemspec | 4 +- spec/inputs/elasticsearch_spec.rb | 153 ++++++++++++++++++ 5 files changed, 183 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51e23421..0e37e33e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.11.0 + - Feat: add user-agent header passed to the Elasticsearch HTTP connection [#158](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/158) + ## 4.10.0 - Feat: added ecs_compatibility + event_factory support [#149](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/149) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index de37eaf8..091e8f77 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -222,6 +222,7 @@ def register transport_options = {:headers => {}} transport_options[:headers].merge!(setup_basic_auth(user, password)) transport_options[:headers].merge!(setup_api_key(api_key)) + transport_options[:headers].merge!({'user-agent' => prepare_user_agent()}) transport_options[:request_timeout] = @request_timeout_seconds unless @request_timeout_seconds.nil? transport_options[:connect_timeout] = @connect_timeout_seconds unless @connect_timeout_seconds.nil? transport_options[:socket_timeout] = @socket_timeout_seconds unless @socket_timeout_seconds.nil? @@ -407,6 +408,18 @@ def setup_api_key(api_key) { 'Authorization' => "ApiKey #{token}" } end + def prepare_user_agent + os_name = java.lang.System.getProperty('os.name') + os_version = java.lang.System.getProperty('os.version') + os_arch = java.lang.System.getProperty('os.arch') + jvm_vendor = java.lang.System.getProperty('java.vendor') + jvm_version = java.lang.System.getProperty('java.version') + + plugin_version = Gem.loaded_specs["logstash-input-elasticsearch"].version + # example: logstash/7.14.1 (OS=Linux-5.4.0-84-generic-amd64; JVM=AdoptOpenJDK-11.0.11) logstash-input-elasticsearch/4.10.0 + "logstash/#{LOGSTASH_VERSION} (OS=#{os_name}-#{os_version}-#{os_arch}; JVM=#{jvm_vendor}-#{jvm_version}) logstash-#{@plugin_type}-#{config_name}/#{plugin_version}" + end + def fill_user_password_from_cloud_auth return unless @cloud_auth diff --git a/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_http_manticore.rb b/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_http_manticore.rb index c7c712e0..e39bd9c2 100644 --- a/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_http_manticore.rb +++ b/lib/logstash/inputs/elasticsearch/patches/_elasticsearch_transport_http_manticore.rb @@ -20,7 +20,17 @@ class Manticore def apply_headers(request_options, options) headers = (options && options[:headers]) || {} headers[CONTENT_TYPE_STR] = find_value(headers, CONTENT_TYPE_REGEX) || DEFAULT_CONTENT_TYPE - headers[USER_AGENT_STR] = find_value(headers, USER_AGENT_REGEX) || user_agent_header + + # this code is necessary to grab the correct user-agent header + # when this method is invoked with apply_headers(@request_options, options) + # from https://github.com/elastic/elasticsearch-ruby/blob/v7.14.0/elasticsearch-transport/lib/elasticsearch/transport/transport/http/manticore.rb#L113-L114 + transport_user_agent = nil + if (options && options[:transport_options] && options[:transport_options][:headers]) + transport_headers = options[:transport_options][:headers] + transport_user_agent = find_value(transport_headers, USER_AGENT_REGEX) + end + + headers[USER_AGENT_STR] = transport_user_agent || find_value(headers, USER_AGENT_REGEX) || user_agent_header headers[ACCEPT_ENCODING] = GZIP if use_compression? (request_options[:headers] ||= {}).merge!(headers) # this line was changed end diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index c19b6fdd..263babda 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.10.0' + s.version = '4.11.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -36,4 +36,6 @@ Gem::Specification.new do |s| s.add_development_dependency 'faraday', "~> 0.15.4" s.add_development_dependency 'logstash-devutils' s.add_development_dependency 'timecop' + s.add_development_dependency 'cabin', ['~> 0.6'] + s.add_development_dependency 'webrick' end diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 72f70ea6..af48925c 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -7,6 +7,9 @@ require "stud/temporary" require "time" require "date" +require "cabin" +require "webrick" +require "uri" require 'logstash/plugin_mixins/ecs_compatibility_support/spec_helper' @@ -690,6 +693,156 @@ def synchronize_method!(object, method_name) end end + class StoppableServer + + attr_reader :port + + def initialize() + queue = Queue.new + @first_req_waiter = java.util.concurrent.CountDownLatch.new(1) + @first_request = nil + + @t = java.lang.Thread.new( + proc do + begin + @server = WEBrick::HTTPServer.new :Port => 0, :DocumentRoot => ".", + :Logger => Cabin::Channel.get, # silence WEBrick logging + :StartCallback => Proc.new { + queue.push("started") + } + @port = @server.config[:Port] + @server.mount_proc '/' do |req, res| + res.body = ''' + { + "name": "ce7ccfb438e8", + "cluster_name": "docker-cluster", + "cluster_uuid": "DyR1hN03QvuCWXRy3jtb0g", + "version": { + "number": "7.13.1", + "build_flavor": "default", + "build_type": "docker", + "build_hash": "9a7758028e4ea59bcab41c12004603c5a7dd84a9", + "build_date": "2021-05-28T17:40:59.346932922Z", + "build_snapshot": false, + "lucene_version": "8.8.2", + "minimum_wire_compatibility_version": "6.8.0", + "minimum_index_compatibility_version": "6.0.0-beta1" + }, + "tagline": "You Know, for Search" + } + ''' + res.status = 200 + res['Content-Type'] = 'application/json' + @first_request = req + @first_req_waiter.countDown() + end + + @server.mount_proc '/logstash_unit_test/_search' do |req, res| + res.body = ''' + { + "took" : 1, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 10000, + "relation" : "gte" + }, + "max_score" : 1.0, + "hits" : [ + { + "_index" : "test_bulk_index_2", + "_type" : "_doc", + "_id" : "sHe6A3wBesqF7ydicQvG", + "_score" : 1.0, + "_source" : { + "@timestamp" : "2021-09-20T15:02:02.557Z", + "message" : "{\"name\": \"Andrea\"}", + "@version" : "1", + "host" : "kalispera", + "sequence" : 5 + } + } + ] + } + } + ''' + res.status = 200 + res['Content-Type'] = 'application/json' + @first_request = req + @first_req_waiter.countDown() + end + + + + @server.start + rescue => e + puts "Error in webserver thread #{e}" + # ignore + end + end + ) + @t.daemon = true + @t.start + queue.pop # blocks until the server is up + end + + def stop + @server.shutdown + end + + def wait_receive_request + @first_req_waiter.await(2, java.util.concurrent.TimeUnit::SECONDS) + @first_request + end + end + + describe "'user-agent' header" do + let!(:webserver) { StoppableServer.new } # webserver must be started before the call, so no lazy "let" + + after :each do + webserver.stop + end + + it "server should be started" do + require 'net/http' + response = nil + Net::HTTP.start('localhost', webserver.port) {|http| + response = http.request_get('/') + } + expect(response.code.to_i).to eq(200) + end + + context "used by plugin" do + let(:config) do + { + "hosts" => ["localhost:#{webserver.port}"], + "query" => '{ "query": { "match": { "statuscode": 200 } }, "sort": [ "_doc" ] }', + "index" => "logstash_unit_test" + } + end + let(:plugin) { described_class.new(config) } + let(:event) { LogStash::Event.new({}) } + + it "client should sent the expect user-agent" do + plugin.register + + queue = [] + plugin.run(queue) + + request = webserver.wait_receive_request + + expect(request.header['user-agent'].size).to eq(1) + expect(request.header['user-agent'][0]).to match(/logstash\/\d*\.\d*\.\d* \(OS=.*; JVM=.*\) logstash-input-elasticsearch\/\d*\.\d*\.\d*/) + end + end + end + shared_examples'configurable timeout' do |config_name, manticore_transport_option| let(:config_value) { fail NotImplementedError } let(:config) { super().merge(config_name => config_value) } From 629eea9db291d510f00054a9bf546275a3f9096d Mon Sep 17 00:00:00 2001 From: Andrea Selva Date: Mon, 11 Oct 2021 10:10:20 +0200 Subject: [PATCH 134/207] Verify connection to Elasticsearch (#157) Update Elasticsearch ruby client to version 7.14 and manage the not valid product error --- .travis.yml | 2 +- CHANGELOG.md | 3 ++ lib/logstash/inputs/elasticsearch.rb | 8 +++ logstash-input-elasticsearch.gemspec | 6 +-- spec/inputs/elasticsearch_spec.rb | 52 ++++++++++++++++--- spec/inputs/integration/elasticsearch_spec.rb | 12 +++-- 6 files changed, 68 insertions(+), 15 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1a29639f..ecfbd168 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,4 +15,4 @@ env: - INTEGRATION=true ELASTIC_STACK_VERSION=8.x SNAPSHOT=true - SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x - SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.14.0 -- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.9.3 +- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.9.3 \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e37e33e..3b88c8a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.12.0 + - Feat: Update Elasticsearch client to 7.14.0 [#157](https://github.com/logstash-plugins/logstash-filter-elasticsearch/pull/157) + ## 4.11.0 - Feat: add user-agent header passed to the Elasticsearch HTTP connection [#158](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/158) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 091e8f77..7e2fcd54 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -240,6 +240,8 @@ def register :transport_class => ::Elasticsearch::Transport::Transport::HTTP::Manticore, :ssl => ssl_options ) + test_connection! + @client end @@ -473,6 +475,12 @@ def parse_user_password_from_cloud_auth(cloud_auth) # @private used by unit specs attr_reader :client + def test_connection! + @client.ping + rescue Elasticsearch::UnsupportedProductError + raise LogStash::ConfigurationError, "Could not connect to a compatible version of Elasticsearch" + end + module URIOrEmptyValidator ## # @override to provide :uri_or_empty validator diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 263babda..f6b26fe8 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.11.0' + s.version = '4.12.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -25,7 +25,7 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'logstash-mixin-event_support', '~> 1.0' s.add_runtime_dependency "logstash-mixin-validator_support", '~> 1.0' - s.add_runtime_dependency 'elasticsearch', '>= 5.0.5' # LS >= 6.7 and < 7.14 all used version 5.0.5 + s.add_runtime_dependency 'elasticsearch', '>= 7.14.0' # LS >= 6.7 and < 7.14 all used version 5.0.5 s.add_runtime_dependency 'tzinfo' s.add_runtime_dependency 'tzinfo-data' @@ -33,7 +33,7 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'manticore', ">= 0.7.1" s.add_development_dependency 'logstash-codec-plain' - s.add_development_dependency 'faraday', "~> 0.15.4" + s.add_development_dependency 'faraday', "~> 1" s.add_development_dependency 'logstash-devutils' s.add_development_dependency 'timecop' s.add_development_dependency 'cabin', ['~> 0.6'] diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index af48925c..c301acd0 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -18,6 +18,34 @@ let(:plugin) { described_class.new(config) } let(:queue) { Queue.new } + before(:each) do + Elasticsearch::Client.send(:define_method, :ping) { } # define no-action ping method + end + + context "register" do + let(:config) do + { + "schedule" => "* * * * * UTC" + } + end + + context "against authentic Elasticsearch" do + it "should not raise an exception" do + expect { plugin.register }.to_not raise_error + end + end + + context "against not authentic Elasticsearch" do + before(:each) do + Elasticsearch::Client.send(:define_method, :ping) { raise Elasticsearch::UnsupportedProductError.new("Fake error") } # define error ping method + end + + it "should raise ConfigurationError" do + expect { plugin.register }.to raise_error(LogStash::ConfigurationError) + end + end + end + it_behaves_like "an interruptible input plugin" do let(:esclient) { double("elasticsearch-client") } let(:config) do @@ -38,6 +66,7 @@ allow(esclient).to receive(:search) { { "hits" => { "hits" => [hit] } } } allow(esclient).to receive(:scroll) { { "hits" => { "hits" => [hit] } } } allow(esclient).to receive(:clear_scroll).and_return(nil) + allow(esclient).to receive(:ping) end end @@ -96,6 +125,7 @@ expect(client).to receive(:search).with(any_args).and_return(mock_response) expect(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(mock_scroll_response) expect(client).to receive(:clear_scroll).and_return(nil) + expect(client).to receive(:ping) end it 'creates the events from the hits' do @@ -311,6 +341,7 @@ def synchronize_method!(object, method_name) expect(client).to receive(:search).with(hash_including(:body => slice0_query)).and_return(slice0_response0) expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll1 })).and_return(slice0_response1) expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll2 })).and_return(slice0_response2) + allow(client).to receive(:ping) # SLICE1 is a two-page scroll in which the last page has no next scroll id slice1_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 1, 'max' => 2})) @@ -410,6 +441,7 @@ def synchronize_method!(object, method_name) expect(client).to receive(:search).with(any_args).and_return(response) allow(client).to receive(:scroll).with({ :body => {:scroll_id => "cXVlcnlUaGVuRmV0Y2g"}, :scroll => "1m" }).and_return(scroll_reponse) allow(client).to receive(:clear_scroll).and_return(nil) + allow(client).to receive(:ping).and_return(nil) end ecs_compatibility_matrix(:disabled, :v1, :v8) do |ecs_select| @@ -586,13 +618,14 @@ def synchronize_method!(object, method_name) it "should set host(s)" do plugin.register client = plugin.send(:client) - expect( extract_transport(client).hosts ).to eql [{ - :scheme => "https", - :host => "ac31ebb90241773157043c34fd26fd46.us-central1.gcp.cloud.es.io", - :port => 9243, - :path => "", - :protocol => "https" - }] + + expect( client.transport.instance_variable_get(:@seeds) ).to eql [{ + :scheme => "https", + :host => "ac31ebb90241773157043c34fd26fd46.us-central1.gcp.cloud.es.io", + :port => 9243, + :path => "", + :protocol => "https" + }] end context 'invalid' do @@ -843,7 +876,7 @@ def wait_receive_request end end - shared_examples'configurable timeout' do |config_name, manticore_transport_option| + shared_examples 'configurable timeout' do |config_name, manticore_transport_option| let(:config_value) { fail NotImplementedError } let(:config) { super().merge(config_name => config_value) } { @@ -874,6 +907,9 @@ def wait_receive_request transport_options = new_elasticsearch_client_params[:transport_options] expect(transport_options).to include(manticore_transport_option) expect(transport_options[manticore_transport_option]).to eq(config_value.to_i) + mock_client = double("fake_client") + allow(mock_client).to receive(:ping) + mock_client end plugin.register diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb index fc35dd35..e7c11328 100644 --- a/spec/inputs/integration/elasticsearch_spec.rb +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -24,7 +24,6 @@ ESHelper.index_doc(@es, :index => 'logs', :body => { :response => 404, :message=> 'Not Found'}) end @es.indices.refresh - plugin.register end after(:each) do @@ -33,6 +32,10 @@ end shared_examples 'an elasticsearch index plugin' do + before(:each) do + plugin.register + end + it 'should retrieve json event from elasticsearch' do queue = [] plugin.run(queue) @@ -43,6 +46,10 @@ end describe 'against an unsecured elasticsearch', :integration => true do + before(:each) do + plugin.register + end + it_behaves_like 'an elasticsearch index plugin' end @@ -66,8 +73,7 @@ let(:queue) { [] } it "fails to run the plugin" do - plugin.register - expect { plugin.run queue }.to raise_error Elasticsearch::Transport::Transport::Errors::Unauthorized + expect { plugin.register }.to raise_error Elasticsearch::Transport::Transport::Errors::Unauthorized end end From 6e990bee2eb45ed192955da33cfa4536660dccfb Mon Sep 17 00:00:00 2001 From: Karen Metts <35154725+karenzone@users.noreply.github.com> Date: Tue, 12 Oct 2021 18:45:59 -0400 Subject: [PATCH 135/207] Fix PR link in changelog entry (#160) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b88c8a8..ad96499a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ ## 4.12.0 - - Feat: Update Elasticsearch client to 7.14.0 [#157](https://github.com/logstash-plugins/logstash-filter-elasticsearch/pull/157) + - Feat: Update Elasticsearch client to 7.14.0 [#157](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/157) ## 4.11.0 - Feat: add user-agent header passed to the Elasticsearch HTTP connection [#158](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/158) From 8c300d8c045c5dedcf7f5f57e0ceccc55ae42e9a Mon Sep 17 00:00:00 2001 From: Richard Gomez <32133502+rgmz@users.noreply.github.com> Date: Mon, 11 Oct 2021 10:51:08 -0400 Subject: [PATCH 136/207] fix: pass scroll_id in the body This should resolve the `too_long_frame_exception` issue described in #141. --- lib/logstash/inputs/elasticsearch.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 7e2fcd54..851ebd74 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -347,7 +347,7 @@ def set_docinfo_fields(hit, event) end def clear_scroll(scroll_id) - @client.clear_scroll(scroll_id: scroll_id) if scroll_id + @client.clear_scroll(:body => { :scroll_id => scroll_id }) if scroll_id rescue => e # ignore & log any clear_scroll errors logger.warn("Ignoring clear_scroll exception", message: e.message, exception: e.class) From 13b62c34af4d409a8b8ec0ba29a1b5f4de7e7f65 Mon Sep 17 00:00:00 2001 From: Joao Duarte Date: Fri, 15 Oct 2021 17:57:01 +0100 Subject: [PATCH 137/207] prepare release 4.12.1 --- .travis.yml | 5 +---- CHANGELOG.md | 3 +++ logstash-input-elasticsearch.gemspec | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index ecfbd168..a2283acc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,14 +5,11 @@ before_install: - sudo sysctl -w vm.max_map_count=262144 # due ES bootstrap requirements env: -- INTEGRATION=false ELASTIC_STACK_VERSION=6.x - INTEGRATION=false ELASTIC_STACK_VERSION=7.x - INTEGRATION=false ELASTIC_STACK_VERSION=7.x SNAPSHOT=true - INTEGRATION=false ELASTIC_STACK_VERSION=8.x SNAPSHOT=true -- INTEGRATION=true ELASTIC_STACK_VERSION=6.x - INTEGRATION=true ELASTIC_STACK_VERSION=7.x - INTEGRATION=true ELASTIC_STACK_VERSION=7.x SNAPSHOT=true - INTEGRATION=true ELASTIC_STACK_VERSION=8.x SNAPSHOT=true - SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x -- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.14.0 -- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.9.3 \ No newline at end of file +- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.15.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index ad96499a..693ae499 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.12.1 + - Fixed too_long_frame_exception by passing scroll_id in the body [#159](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/159) + ## 4.12.0 - Feat: Update Elasticsearch client to 7.14.0 [#157](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/157) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index f6b26fe8..5570314d 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.12.0' + s.version = '4.12.1' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 8385b2ebbb6de436e048ce6e35df9575f4c0b63b Mon Sep 17 00:00:00 2001 From: Karol Bucek Date: Mon, 21 Feb 2022 15:42:34 +0100 Subject: [PATCH 138/207] Fix: hosts => "es_host:port" regression (#168) --- .ci/logstash-run.sh | 4 +- .travis.yml | 10 +++-- CHANGELOG.md | 3 ++ Gemfile | 3 ++ lib/logstash/inputs/elasticsearch.rb | 12 +++--- logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/integration/elasticsearch_spec.rb | 39 ++++++++++++++++--- 7 files changed, 54 insertions(+), 19 deletions(-) diff --git a/.ci/logstash-run.sh b/.ci/logstash-run.sh index d0fb854b..e7e81e1d 100755 --- a/.ci/logstash-run.sh +++ b/.ci/logstash-run.sh @@ -21,7 +21,7 @@ wait_for_es() { } if [[ "$INTEGRATION" != "true" ]]; then - bundle exec rspec -fd spec/inputs -t ~integration -t ~secure_integration + jruby -rbundler/setup -S rspec -fd --tag ~integration --tag ~secure_integration spec/inputs else if [[ "$SECURE_INTEGRATION" == "true" ]]; then extra_tag_args="--tag secure_integration" @@ -29,5 +29,5 @@ else extra_tag_args="--tag ~secure_integration --tag integration" fi wait_for_es - bundle exec rspec -fd $extra_tag_args --tag es_version:$ELASTIC_STACK_VERSION spec/inputs/integration + jruby -rbundler/setup -S rspec -fd $extra_tag_args --tag es_version:$ELASTIC_STACK_VERSION spec/inputs/integration fi diff --git a/.travis.yml b/.travis.yml index a2283acc..998e838f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,10 +6,12 @@ before_install: env: - INTEGRATION=false ELASTIC_STACK_VERSION=7.x +- INTEGRATION=false ELASTIC_STACK_VERSION=8.x - INTEGRATION=false ELASTIC_STACK_VERSION=7.x SNAPSHOT=true - INTEGRATION=false ELASTIC_STACK_VERSION=8.x SNAPSHOT=true +- INTEGRATION=false ELASTIC_STACK_VERSION=7.16.3 MANTICORE_VERSION=0.7.1 ELASTICSEARCH_VERSION=7.15.0 - INTEGRATION=true ELASTIC_STACK_VERSION=7.x -- INTEGRATION=true ELASTIC_STACK_VERSION=7.x SNAPSHOT=true -- INTEGRATION=true ELASTIC_STACK_VERSION=8.x SNAPSHOT=true -- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x -- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.15.0 +- INTEGRATION=true ELASTIC_STACK_VERSION=7.x SNAPSHOT=true LOG_LEVEL=info +- INTEGRATION=true ELASTIC_STACK_VERSION=8.x SNAPSHOT=true LOG_LEVEL=info +- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x LOG_LEVEL=info +- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.15.0 MANTICORE_VERSION=0.7.1 ELASTICSEARCH_VERSION=7.14.1 LOG_LEVEL=info diff --git a/CHANGELOG.md b/CHANGELOG.md index 693ae499..2251e18e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.12.2 + - Fix: hosts => "es_host:port" regression [#168](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/168) + ## 4.12.1 - Fixed too_long_frame_exception by passing scroll_id in the body [#159](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/159) diff --git a/Gemfile b/Gemfile index 32cc6fbb..85bf0375 100644 --- a/Gemfile +++ b/Gemfile @@ -9,3 +9,6 @@ if Dir.exist?(logstash_path) && use_logstash_source gem 'logstash-core', :path => "#{logstash_path}/logstash-core" gem 'logstash-core-plugin-api', :path => "#{logstash_path}/logstash-core-plugin-api" end + +gem 'manticore', ENV['MANTICORE_VERSION'] if ENV['MANTICORE_VERSION'] +gem 'elasticsearch', ENV['ELASTICSEARCH_VERSION'] if ENV['ELASTICSEARCH_VERSION'] diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 851ebd74..1319d7f5 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -386,13 +386,13 @@ def setup_ssl def setup_hosts @hosts = Array(@hosts).map { |host| host.to_s } # potential SafeURI#to_s - if @ssl - @hosts.map do |h| - host, port = h.split(":") - { :host => host, :scheme => 'https', :port => port } + @hosts.map do |h| + if h.start_with?('http:', 'https:') + h + else + host, port = h.split(':') + { host: host, port: port, scheme: (@ssl ? 'https' : 'http') } end - else - @hosts end end diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 5570314d..2cd5bfcf 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.12.1' + s.version = '4.12.2' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb index e7c11328..8edc5d89 100644 --- a/spec/inputs/integration/elasticsearch_spec.rb +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -6,13 +6,19 @@ describe LogStash::Inputs::Elasticsearch do - let(:config) { { 'hosts' => [ESHelper.get_host_port], + SECURE_INTEGRATION = ENV['SECURE_INTEGRATION'].eql? 'true' + + let(:config) { { 'hosts' => ["http#{SECURE_INTEGRATION ? 's' : nil}://#{ESHelper.get_host_port}"], 'index' => 'logs', 'query' => '{ "query": { "match": { "message": "Not found"} }}' } } let(:plugin) { described_class.new(config) } let(:event) { LogStash::Event.new({}) } let(:client_options) { Hash.new } + let(:user) { ENV['ELASTIC_USER'] || 'simpleuser' } + let(:password) { ENV['ELASTIC_PASSWORD'] || 'abc123' } + let(:ca_file) { "spec/fixtures/test_certs/ca.crt" } + before(:each) do @es = ESHelper.get_client(client_options) # Delete all templates first. @@ -45,7 +51,7 @@ end end - describe 'against an unsecured elasticsearch', :integration => true do + describe 'against an unsecured elasticsearch', integration: true do before(:each) do plugin.register end @@ -53,10 +59,7 @@ it_behaves_like 'an elasticsearch index plugin' end - describe 'against a secured elasticsearch', :secure_integration => true do - let(:user) { ENV['ELASTIC_USER'] || 'simpleuser' } - let(:password) { ENV['ELASTIC_PASSWORD'] || 'abc123' } - let(:ca_file) { "spec/fixtures/test_certs/ca.crt" } + describe 'against a secured elasticsearch', secure_integration: true do let(:client_options) { { :ca_file => ca_file, :user => user, :password => password } } @@ -78,4 +81,28 @@ end end + + context 'setting host:port', integration: true do + + let(:config) do + super().merge "hosts" => [ESHelper.get_host_port] + end + + it_behaves_like 'an elasticsearch index plugin' + + end + + context 'setting host:port (and ssl)', secure_integration: true do + + let(:client_options) { { :ca_file => ca_file, :user => user, :password => password } } + + let(:config) do + config = super().merge "hosts" => [ESHelper.get_host_port] + config.merge('user' => user, 'password' => password, 'ssl' => true, 'ca_file' => ca_file) + end + + it_behaves_like 'an elasticsearch index plugin' + + end + end From ff199c77f139a642de2838aeaf99c2ce6084d4ff Mon Sep 17 00:00:00 2001 From: Karol Bucek Date: Wed, 16 Mar 2022 12:15:10 +0100 Subject: [PATCH 139/207] Test: refactor potential stub leak from example (#169) --- spec/inputs/elasticsearch_spec.rb | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index c301acd0..74827eee 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -933,7 +933,7 @@ def wait_receive_request { "hosts" => ["localhost"], "query" => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }', - "schedule" => "* * * * * UTC" + "schedule" => "* * * * * * UTC" # every second } end @@ -942,21 +942,14 @@ def wait_receive_request end it "should properly schedule" do - Timecop.travel(Time.new(2000)) - Timecop.scale(60) - runner = Thread.new do - expect(plugin).to receive(:do_run) { - queue << LogStash::Event.new({}) - }.at_least(:twice) - - plugin.run(queue) - end - sleep 3 + expect(plugin).to receive(:do_run) { + queue << LogStash::Event.new({}) + }.at_least(:twice) + runner = Thread.start { plugin.run(queue) } + sleep 3.0 plugin.stop - runner.kill runner.join - expect(queue.size).to eq(2) - Timecop.return + expect(queue.size).to be >= 2 end end From cb9faacf0ff4bab3138febd2f45508e6b129aa60 Mon Sep 17 00:00:00 2001 From: Karol Bucek Date: Wed, 16 Mar 2022 12:15:46 +0100 Subject: [PATCH 140/207] Test: against ES with configured TLSv1.3 (#162) Co-authored-by: kaisecheng <69120390+kaisecheng@users.noreply.github.com> --- .ci/Dockerfile.elasticsearch | 4 +++ .ci/docker-compose.override.yml | 4 ++- .ci/logstash-run.sh | 16 ++++++--- .travis.yml | 1 + logstash-input-elasticsearch.gemspec | 1 - spec/es_helper.rb | 33 ++++++++++--------- spec/inputs/integration/elasticsearch_spec.rb | 25 ++++++++++---- 7 files changed, 55 insertions(+), 29 deletions(-) diff --git a/.ci/Dockerfile.elasticsearch b/.ci/Dockerfile.elasticsearch index 2756c149..8ff2b75a 100644 --- a/.ci/Dockerfile.elasticsearch +++ b/.ci/Dockerfile.elasticsearch @@ -5,6 +5,7 @@ ARG plugin_path=/usr/share/plugins/plugin ARG es_path=/usr/share/elasticsearch ARG es_yml=$es_path/config/elasticsearch.yml ARG SECURE_INTEGRATION +ARG ES_SSL_SUPPORTED_PROTOCOLS RUN rm -f $es_path/config/scripts @@ -15,6 +16,9 @@ RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.k RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.certificate: $es_path/config/test_certs/es.crt" >> $es_yml; fi RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.certificate_authorities: [ '$es_path/config/test_certs/ca.crt' ]" >> $es_yml; fi RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.verification_mode: certificate" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] && [ -n "$ES_SSL_SUPPORTED_PROTOCOLS" ] ; then echo "xpack.security.http.ssl.supported_protocols: ${ES_SSL_SUPPORTED_PROTOCOLS}" >> $es_yml; fi + +RUN cat $es_yml RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then $es_path/bin/elasticsearch-users useradd simpleuser -p abc123 -r superuser; fi RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then $es_path/bin/elasticsearch-users useradd 'f@ncyuser' -p 'ab%12#' -r superuser; fi diff --git a/.ci/docker-compose.override.yml b/.ci/docker-compose.override.yml index 1d05943b..eb8377c7 100644 --- a/.ci/docker-compose.override.yml +++ b/.ci/docker-compose.override.yml @@ -5,9 +5,10 @@ services: logstash: command: /usr/share/plugins/plugin/.ci/logstash-run.sh environment: + - ELASTIC_STACK_VERSION=$ELASTIC_STACK_VERSION - INTEGRATION=${INTEGRATION:-false} - SECURE_INTEGRATION=${SECURE_INTEGRATION:-false} - - ELASTIC_STACK_VERSION=$ELASTIC_STACK_VERSION + - ES_SSL_SUPPORTED_PROTOCOLS=$ES_SSL_SUPPORTED_PROTOCOLS elasticsearch: build: context: ../ @@ -16,6 +17,7 @@ services: - ELASTIC_STACK_VERSION=$ELASTIC_STACK_VERSION - INTEGRATION=${INTEGRATION:-false} - SECURE_INTEGRATION=${SECURE_INTEGRATION:-false} + - ES_SSL_SUPPORTED_PROTOCOLS=$ES_SSL_SUPPORTED_PROTOCOLS environment: #- ELASTIC_PASSWORD=$ELASTIC_PASSWORD a user is setup manually - xpack.security.enabled=${SECURE_INTEGRATION:-false} diff --git a/.ci/logstash-run.sh b/.ci/logstash-run.sh index e7e81e1d..a0553960 100755 --- a/.ci/logstash-run.sh +++ b/.ci/logstash-run.sh @@ -1,21 +1,26 @@ #!/bin/bash + +env + set -ex export PATH=$BUILD_DIR/gradle/bin:$PATH +# CentOS 7 using curl defaults does not enable TLSv1.3 +CURL_OPTS="-k --tlsv1.2 --tls-max 1.3" + wait_for_es() { - echo "Waiting for elasticsearch to respond..." es_url="http://elasticsearch:9200/" if [[ "$SECURE_INTEGRATION" == "true" ]]; then - es_url="https://elasticsearch:9200 -k" + es_url="https://elasticsearch:9200/" fi count=120 - while ! curl --silent $es_url && [[ $count -ne 0 ]]; do + while ! curl $CURL_OPTS $es_url && [[ $count -ne 0 ]]; do count=$(( $count - 1 )) [[ $count -eq 0 ]] && return 1 sleep 1 done - echo "Elasticsearch is Up !" + echo $(curl $CURL_OPTS -v $ES_URL) return 0 } @@ -28,6 +33,9 @@ else else extra_tag_args="--tag ~secure_integration --tag integration" fi + + echo "Waiting for elasticsearch to respond..." wait_for_es + echo "Elasticsearch is Up !" jruby -rbundler/setup -S rspec -fd $extra_tag_args --tag es_version:$ELASTIC_STACK_VERSION spec/inputs/integration fi diff --git a/.travis.yml b/.travis.yml index 998e838f..ae3b17c1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,4 +14,5 @@ env: - INTEGRATION=true ELASTIC_STACK_VERSION=7.x SNAPSHOT=true LOG_LEVEL=info - INTEGRATION=true ELASTIC_STACK_VERSION=8.x SNAPSHOT=true LOG_LEVEL=info - SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x LOG_LEVEL=info +- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x ES_SSL_SUPPORTED_PROTOCOLS=TLSv1.3 - SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.15.0 MANTICORE_VERSION=0.7.1 ELASTICSEARCH_VERSION=7.14.1 LOG_LEVEL=info diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 2cd5bfcf..bff1a0b2 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -33,7 +33,6 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'manticore', ">= 0.7.1" s.add_development_dependency 'logstash-codec-plain' - s.add_development_dependency 'faraday', "~> 1" s.add_development_dependency 'logstash-devutils' s.add_development_dependency 'timecop' s.add_development_dependency 'cabin', ['~> 0.6'] diff --git a/spec/es_helper.rb b/spec/es_helper.rb index c3861211..cab670e6 100644 --- a/spec/es_helper.rb +++ b/spec/es_helper.rb @@ -7,25 +7,26 @@ def self.get_host_port end end - def self.get_client(options) - require 'elasticsearch/transport/transport/http/faraday' # supports user/password options - host, port = get_host_port.split(':') - host_opts = { host: host, port: port, scheme: 'http' } - ssl_opts = {} - - if options[:ca_file] - ssl_opts = { ca_file: options[:ca_file], version: 'TLSv1.2', verify: false } - host_opts[:scheme] = 'https' + def self.curl_and_get_json_response(url, method: :get, args: nil); require 'open3' + cmd = "curl -s -v --show-error #{args} -X #{method.to_s.upcase} -k #{url}" + begin + out, err, status = Open3.capture3(cmd) + rescue Errno::ENOENT + fail "curl not available, make sure curl binary is installed and available on $PATH" end - if options[:user] && options[:password] - host_opts[:user] = options[:user] - host_opts[:password] = options[:password] - end + if status.success? + http_status = err.match(/< HTTP\/1.1 (.*?)/)[1] || '0' # < HTTP/1.1 200 OK\r\n + if http_status.strip[0].to_i > 2 + warn out + fail "#{cmd.inspect} unexpected response: #{http_status}\n\n#{err}" + end - Elasticsearch::Client.new(hosts: [host_opts], - transport_options: { ssl: ssl_opts }, - transport_class: Elasticsearch::Transport::Transport::HTTP::Faraday) + LogStash::Json.load(out) + else + warn out + fail "#{cmd.inspect} process failed: #{status}\n\n#{err}" + end end def self.doc_type diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb index 8edc5d89..401706d6 100644 --- a/spec/inputs/integration/elasticsearch_spec.rb +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -11,6 +11,7 @@ let(:config) { { 'hosts' => ["http#{SECURE_INTEGRATION ? 's' : nil}://#{ESHelper.get_host_port}"], 'index' => 'logs', 'query' => '{ "query": { "match": { "message": "Not found"} }}' } } + let(:plugin) { described_class.new(config) } let(:event) { LogStash::Event.new({}) } let(:client_options) { Hash.new } @@ -19,22 +20,32 @@ let(:password) { ENV['ELASTIC_PASSWORD'] || 'abc123' } let(:ca_file) { "spec/fixtures/test_certs/ca.crt" } + let(:es_url) do + es_url = ESHelper.get_host_port + SECURE_INTEGRATION ? "https://#{es_url}" : "http://#{es_url}" + end + + let(:curl_args) do + config['user'] ? "-u #{config['user']}:#{config['password']}" : '' + end + before(:each) do - @es = ESHelper.get_client(client_options) # Delete all templates first. # Clean ES of data before we start. - @es.indices.delete_template(:name => "*") + ESHelper.curl_and_get_json_response "#{es_url}/_index_template/*", method: 'DELETE', args: curl_args # This can fail if there are no indexes, ignore failure. - @es.indices.delete(:index => "*") rescue nil + ESHelper.curl_and_get_json_response( "#{es_url}/_index/*", method: 'DELETE', args: curl_args) rescue nil + doc_args = "#{curl_args} -H 'Content-Type: application/json' -d '{\"response\": 404, \"message\":\"Not Found\"}'" 10.times do - ESHelper.index_doc(@es, :index => 'logs', :body => { :response => 404, :message=> 'Not Found'}) + ESHelper.curl_and_get_json_response "#{es_url}/logs/_doc", method: 'POST', args: doc_args end - @es.indices.refresh + ESHelper.curl_and_get_json_response "#{es_url}/_refresh", method: 'POST', args: curl_args end after(:each) do - @es.indices.delete_template(:name => "*") - @es.indices.delete(:index => "*") rescue nil + ESHelper.curl_and_get_json_response "#{es_url}/_index_template/*", method: 'DELETE', args: curl_args + # This can fail if there are no indexes, ignore failure. + ESHelper.curl_and_get_json_response( "#{es_url}/_index/*", method: 'DELETE', args: curl_args) rescue nil end shared_examples 'an elasticsearch index plugin' do From 6369311bb9e1b8306fbbd29ba1d725cf23babee6 Mon Sep 17 00:00:00 2001 From: Andrea Selva Date: Tue, 29 Mar 2022 10:00:04 +0200 Subject: [PATCH 141/207] Update ES ruby client version (#171) Request client version >= 7.17.1 to fix usage of custom user-agent header --- CHANGELOG.md | 11 +++++++---- logstash-input-elasticsearch.gemspec | 4 ++-- spec/inputs/elasticsearch_spec.rb | 12 ++++++------ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2251e18e..8a71f2ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.12.3 + - Fix: update Elasticsearch Ruby client to correctly customize 'user-agent' header[#171](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/171) + ## 4.12.2 - Fix: hosts => "es_host:port" regression [#168](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/168) @@ -17,7 +20,7 @@ - Fixed SSL handshake hang indefinitely with proxy setup [#156](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/156) ## 4.9.2 - - Fix: a regression (in LS 7.14.0) where due the elasticsearch client update (from 5.0.5 to 7.5.0) the `Authorization` + - Fix: a regression (in LS 7.14.0) where due the elasticsearch client update (from 5.0.5 to 7.5.0) the `Authorization` header isn't passed, this leads to the plugin not being able to leverage `user`/`password` credentials set by the user. [#153](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/153) @@ -61,7 +64,7 @@ - Feat: Added support for cloud_id / cloud_auth configuration [#112](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/112) ## 4.4.0 - - Changed Elasticsearch Client transport to use Manticore [#111](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/111) + - Changed Elasticsearch Client transport to use Manticore [#111](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/111) ## 4.3.3 - Loosen restrictions on Elasticsearch gem [#110](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/110) @@ -100,7 +103,7 @@ ## 4.0.2 - Bump ES client to 5.0.2 to get content-type: json behavior - - Revert unneeded manticore change + - Revert unneeded manticore change ## 4.0.1 - Switch internal HTTP client to support TLSv1.2 @@ -113,7 +116,7 @@ behavior - Improve documentation to show sort by \_doc, and how to add it to custom queries. - + ## 3.0.2 - Relax constraint on logstash-core-plugin-api to >= 1.60 <= 2.99 diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index bff1a0b2..114da8d4 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.12.2' + s.version = '4.12.3' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -25,7 +25,7 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'logstash-mixin-event_support', '~> 1.0' s.add_runtime_dependency "logstash-mixin-validator_support", '~> 1.0' - s.add_runtime_dependency 'elasticsearch', '>= 7.14.0' # LS >= 6.7 and < 7.14 all used version 5.0.5 + s.add_runtime_dependency 'elasticsearch', '>= 7.17.1' s.add_runtime_dependency 'tzinfo' s.add_runtime_dependency 'tzinfo-data' diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 74827eee..7d022eeb 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -47,7 +47,6 @@ end it_behaves_like "an interruptible input plugin" do - let(:esclient) { double("elasticsearch-client") } let(:config) do { "schedule" => "* * * * * UTC" @@ -55,7 +54,8 @@ end before :each do - allow(Elasticsearch::Client).to receive(:new).and_return(esclient) + @esclient = double("elasticsearch-client") + allow(Elasticsearch::Client).to receive(:new).and_return(@esclient) hit = { "_index" => "logstash-2014.10.12", "_type" => "logs", @@ -63,10 +63,10 @@ "_score" => 1.0, "_source" => { "message" => ["ohayo"] } } - allow(esclient).to receive(:search) { { "hits" => { "hits" => [hit] } } } - allow(esclient).to receive(:scroll) { { "hits" => { "hits" => [hit] } } } - allow(esclient).to receive(:clear_scroll).and_return(nil) - allow(esclient).to receive(:ping) + allow(@esclient).to receive(:search) { { "hits" => { "hits" => [hit] } } } + allow(@esclient).to receive(:scroll) { { "hits" => { "hits" => [hit] } } } + allow(@esclient).to receive(:clear_scroll).and_return(nil) + allow(@esclient).to receive(:ping) end end From 33d6179c72b7e83f5bfa5e8ef031d560d999daf5 Mon Sep 17 00:00:00 2001 From: Karol Bucek Date: Tue, 29 Mar 2022 17:26:24 +0200 Subject: [PATCH 142/207] Test: less double leaking failures (on CI) (#175) Co-authored-by: Andrea Selva --- spec/inputs/elasticsearch_spec.rb | 232 ++++++++++++------------------ 1 file changed, 89 insertions(+), 143 deletions(-) diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 7d022eeb..e7f8948b 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -19,7 +19,14 @@ let(:queue) { Queue.new } before(:each) do - Elasticsearch::Client.send(:define_method, :ping) { } # define no-action ping method + Elasticsearch::Client.send(:define_method, :ping) { } # define no-action ping method + end + + let(:base_config) do + { + 'hosts' => ["localhost"], + 'query' => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + } end context "register" do @@ -78,14 +85,10 @@ end let(:config) do - %q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - } - } - ] + { + 'hosts' => ["localhost"], + 'query' => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' + } end let(:mock_response) do @@ -128,10 +131,11 @@ expect(client).to receive(:ping) end + before { plugin.register } + it 'creates the events from the hits' do - event = input(config) do |pipeline, queue| - queue.pop - end + plugin.run queue + event = queue.pop expect(event).to be_a(LogStash::Event) expect(event.get("message")).to eql [ "ohayo" ] @@ -139,21 +143,16 @@ context 'when a target is set' do let(:config) do - %q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - target => "[@metadata][_source]" - } - } - ] + { + 'hosts' => ["localhost"], + 'query' => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }', + 'target' => "[@metadata][_source]" + } end it 'creates the event using the target' do - event = input(config) do |pipeline, queue| - queue.pop - end + plugin.run queue + event = queue.pop expect(event).to be_a(LogStash::Event) expect(event.get("[@metadata][_source][message]")).to eql [ "ohayo" ] @@ -450,24 +449,21 @@ def synchronize_method!(object, method_name) allow_any_instance_of(described_class).to receive(:ecs_compatibility).and_return(ecs_compatibility) end - context 'with docinfo enabled' do - let(:config_metadata) do - %q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - docinfo => true - } - } - ] + before do + if do_register + plugin.register + plugin.run queue end + end - it "provides document info under metadata" do - event = input(config_metadata) do |pipeline, queue| - queue.pop - end + let(:do_register) { true } + let(:event) { queue.pop } + + context 'with docinfo enabled' do + let(:config) { base_config.merge 'docinfo' => true } + + it "provides document info under metadata" do if ecs_select.active_mode == :disabled expect(event.get("[@metadata][_index]")).to eq('logstash-2014.10.12') expect(event.get("[@metadata][_type]")).to eq('logs') @@ -479,123 +475,72 @@ def synchronize_method!(object, method_name) end end - it 'merges values if the `docinfo_target` already exist in the `_source` document' do - config_metadata_with_hash = %Q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - docinfo => true - docinfo_target => 'metadata_with_hash' - } - } - ] + context 'with docinfo_target' do + let(:config) { base_config.merge 'docinfo' => true, 'docinfo_target' => docinfo_target } + let(:docinfo_target) { 'metadata_with_hash' } + + it 'merges values if the `docinfo_target` already exist in the `_source` document' do + expect(event.get("[metadata_with_hash][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[metadata_with_hash][_type]")).to eq('logs') + expect(event.get("[metadata_with_hash][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') + expect(event.get("[metadata_with_hash][awesome]")).to eq("logstash") + end + + context 'non-existent' do + let(:docinfo_target) { 'meta' } + + it 'should move the document information to the specified field' do + expect(event.get("[meta][_index]")).to eq('logstash-2014.10.12') + expect(event.get("[meta][_type]")).to eq('logs') + expect(event.get("[meta][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') + end - event = input(config_metadata_with_hash) do |pipeline, queue| - queue.pop end - expect(event.get("[metadata_with_hash][_index]")).to eq('logstash-2014.10.12') - expect(event.get("[metadata_with_hash][_type]")).to eq('logs') - expect(event.get("[metadata_with_hash][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') - expect(event.get("[metadata_with_hash][awesome]")).to eq("logstash") end context 'if the `docinfo_target` exist but is not of type hash' do - let (:config) { { - "hosts" => ["localhost"], - "query" => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }', - "docinfo" => true, - "docinfo_target" => 'metadata_with_string' - } } - it 'thows an exception if the `docinfo_target` exist but is not of type hash' do + let(:config) { base_config.merge 'docinfo' => true, "docinfo_target" => 'metadata_with_string' } + let(:do_register) { false } + + it 'raises an exception if the `docinfo_target` exist but is not of type hash' do expect(client).not_to receive(:clear_scroll) plugin.register expect { plugin.run([]) }.to raise_error(Exception, /incompatible event/) end - end - - it 'should move the document information to the specified field' do - config = %q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - docinfo => true - docinfo_target => 'meta' - } - } - ] - event = input(config) do |pipeline, queue| - queue.pop - end - expect(event.get("[meta][_index]")).to eq('logstash-2014.10.12') - expect(event.get("[meta][_type]")).to eq('logs') - expect(event.get("[meta][_id]")).to eq('C5b2xLQwTZa76jBmHIbwHQ') end - it "allows to specify which fields from the document info to save to metadata" do - fields = ["_index"] - config = %Q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - docinfo => true - docinfo_fields => #{fields} - } - }] + context 'with docinfo_fields' do + let(:config) { base_config.merge 'docinfo' => true, "docinfo_fields" => ["_index"] } - event = input(config) do |pipeline, queue| - queue.pop + it "allows to specify which fields from the document info to save to metadata" do + meta_base = event.get(ecs_select.active_mode == :disabled ? "@metadata" : "[@metadata][input][elasticsearch]") + expect(meta_base.keys).to eql ["_index"] end - meta_base = event.get(ecs_select.active_mode == :disabled ? "@metadata" : "[@metadata][input][elasticsearch]") - expect(meta_base.keys).to eq(fields) end - it 'should be able to reference metadata fields in `add_field` decorations' do - config = %q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - docinfo => true - add_field => { - 'identifier' => "foo:%{[@metadata][_type]}:%{[@metadata][_id]}" - } - } - } - ] + context 'add_field' do + let(:config) { base_config.merge 'docinfo' => true, + 'add_field' => { 'identifier' => "foo:%{[@metadata][_type]}:%{[@metadata][_id]}" } } - event = input(config) do |pipeline, queue| - queue.pop - end + it 'should be able to reference metadata fields in `add_field` decorations' do + expect(event.get('identifier')).to eq('foo:logs:C5b2xLQwTZa76jBmHIbwHQ') + end if ecs_select.active_mode == :disabled - expect(event.get('identifier')).to eq('foo:logs:C5b2xLQwTZa76jBmHIbwHQ') - end if ecs_select.active_mode == :disabled + end end - end + context "when not defining the docinfo" do + let(:config) { base_config } - context "when not defining the docinfo" do - it 'should keep the document information in the root of the event' do - config = %q[ - input { - elasticsearch { - hosts => ["localhost"] - query => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }' - } - } - ] - event = input(config) do |pipeline, queue| - queue.pop + it 'should keep the document information in the root of the event' do + expect(event.get("[@metadata]")).to be_empty end - - expect(event.get("[@metadata]")).to be_empty end + end end @@ -740,9 +685,7 @@ def initialize() begin @server = WEBrick::HTTPServer.new :Port => 0, :DocumentRoot => ".", :Logger => Cabin::Channel.get, # silence WEBrick logging - :StartCallback => Proc.new { - queue.push("started") - } + :StartCallback => Proc.new { queue.push("started") } @port = @server.config[:Port] @server.mount_proc '/' do |req, res| res.body = ''' @@ -811,11 +754,9 @@ def initialize() @first_req_waiter.countDown() end - - @server.start rescue => e - puts "Error in webserver thread #{e}" + warn "ERROR in webserver thread #{e.inspect}\n #{e.backtrace.join("\n ")}" # ignore end end @@ -914,6 +855,8 @@ def wait_receive_request plugin.register end + + after { plugin.do_stop } end end @@ -942,13 +885,16 @@ def wait_receive_request end it "should properly schedule" do - expect(plugin).to receive(:do_run) { - queue << LogStash::Event.new({}) - }.at_least(:twice) - runner = Thread.start { plugin.run(queue) } - sleep 3.0 - plugin.stop - runner.join + begin + expect(plugin).to receive(:do_run) { + queue << LogStash::Event.new({}) + }.at_least(:twice) + runner = Thread.start { plugin.run(queue) } + sleep 3.0 + ensure + plugin.do_stop + runner.join if runner + end expect(queue.size).to be >= 2 end From 3e339c5380d0863eadafa5e4ee7b1ca4728e26a7 Mon Sep 17 00:00:00 2001 From: Karen Metts <35154725+karenzone@users.noreply.github.com> Date: Thu, 31 Mar 2022 15:45:53 -0400 Subject: [PATCH 143/207] Fix spacing in changelog for consistency (#176) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a71f2ac..837275b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ ## 4.12.3 - - Fix: update Elasticsearch Ruby client to correctly customize 'user-agent' header[#171](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/171) + - Fix: update Elasticsearch Ruby client to correctly customize 'user-agent' header [#171](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/171) ## 4.12.2 - Fix: hosts => "es_host:port" regression [#168](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/168) From 4d6c35a80f8b35a69557d45e3ecf6e316b45c99b Mon Sep 17 00:00:00 2001 From: Ry Biesemeyer Date: Tue, 24 May 2022 09:10:57 -0700 Subject: [PATCH 144/207] ssl: add support for `ca_trusted_fingerprint` on Logstash 8.3+ (#178) * ssl: add support for `ca_trusted_fingerprint` on Logstash 8.3+ * fixup: Array does not implement comparable * remove pre-release dependency, fixup changelog * dev-pin rufus to 3.0.9 * Revert "dev-pin rufus to 3.0.9" Equivalent change made on upstream logstash-core and is now in our 8.3 snapshot builds. This reverts commit 376724ac2c0c8f10243d3de3bd882811ad8013c1. * Revert "Revert "dev-pin rufus to 3.0.9"" Put back temporary dev dependency, since upstream packages to include it are still in-flight. This reverts commit cff7cd2cfe29c1ef1ea35bccd2c0a52940f8fe3a. --- CHANGELOG.md | 4 +- docs/index.asciidoc | 10 +++++ lib/logstash/inputs/elasticsearch.rb | 12 +++++- logstash-input-elasticsearch.gemspec | 6 ++- spec/fixtures/test_certs/ca.der.sha256 | 1 + spec/inputs/integration/elasticsearch_spec.rb | 38 ++++++++++++++----- 6 files changed, 59 insertions(+), 12 deletions(-) create mode 100644 spec/fixtures/test_certs/ca.der.sha256 diff --git a/CHANGELOG.md b/CHANGELOG.md index 837275b7..f1bf557e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.13.0 + - Added support for `ca_trusted_fingerprint` when run on Logstash 8.3+ [#178](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/178) + ## 4.12.3 - Fix: update Elasticsearch Ruby client to correctly customize 'user-agent' header [#171](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/171) @@ -24,7 +27,6 @@ header isn't passed, this leads to the plugin not being able to leverage `user`/`password` credentials set by the user. [#153](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/153) - ## 4.9.1 - [DOC] Replaced hard-coded links with shared attributes [#143](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/143) - [DOC] Added missing quote to docinfo_fields example [#145](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/145) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index d28f4fcb..d5ea9582 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -103,6 +103,7 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |a valid filesystem path|No +| <> |<>|No | <> |<>|No | <> |<>|No | <> | <>|No @@ -152,6 +153,15 @@ API key API]. SSL Certificate Authority file in PEM encoded format, must also include any chain certificates as necessary. +[id="plugins-{type}s-{plugin}-ca_trusted_fingerprint"] +===== `ca_trusted_fingerprint` + +* Value type is <>, and must contain exactly 64 hexadecimal characters. +* There is no default value for this setting. +* Use of this option _requires_ Logstash 8.3+ + +The SHA-256 fingerprint of an SSL Certificate Authority to trust, such as the autogenerated self-signed CA for an Elasticsearch cluster. + [id="plugins-{type}s-{plugin}-cloud_auth"] ===== `cloud_auth` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 1319d7f5..196fab31 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -7,6 +7,7 @@ require 'logstash/plugin_mixins/event_support/event_factory_adapter' require 'logstash/plugin_mixins/ecs_compatibility_support' require 'logstash/plugin_mixins/ecs_compatibility_support/target_check' +require 'logstash/plugin_mixins/ca_trusted_fingerprint_support' require "base64" require "elasticsearch" @@ -192,6 +193,9 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # If set, the _source of each hit will be added nested under the target instead of at the top-level config :target, :validate => :field_reference + # config :ca_trusted_fingerprint, :validate => :sha_256_hex + include LogStash::PluginMixins::CATrustedFingerprintSupport + def initialize(params={}) super(params) @@ -381,7 +385,13 @@ def validate_authentication end def setup_ssl - @ssl && @ca_file ? { :ssl => true, :ca_file => @ca_file } : {} + ssl_options = {} + + ssl_options[:ssl] = true if @ssl + ssl_options[:ca_file] = @ca_file if @ssl && @ca_file + ssl_options[:trust_strategy] = trust_strategy_for_ca_trusted_fingerprint + + ssl_options end def setup_hosts diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 114da8d4..d1e623a1 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.12.3' + s.version = '4.13.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -26,6 +26,7 @@ Gem::Specification.new do |s| s.add_runtime_dependency "logstash-mixin-validator_support", '~> 1.0' s.add_runtime_dependency 'elasticsearch', '>= 7.17.1' + s.add_runtime_dependency 'logstash-mixin-ca_trusted_fingerprint_support', '~> 1.0' s.add_runtime_dependency 'tzinfo' s.add_runtime_dependency 'tzinfo-data' @@ -37,4 +38,7 @@ Gem::Specification.new do |s| s.add_development_dependency 'timecop' s.add_development_dependency 'cabin', ['~> 0.6'] s.add_development_dependency 'webrick' + + # 3.8.0 has breaking changes WRT to joining, which break our specs + s.add_development_dependency 'rufus-scheduler', '~> 3.0.9' end diff --git a/spec/fixtures/test_certs/ca.der.sha256 b/spec/fixtures/test_certs/ca.der.sha256 new file mode 100644 index 00000000..2f04c231 --- /dev/null +++ b/spec/fixtures/test_certs/ca.der.sha256 @@ -0,0 +1 @@ +195a7e7b1bc29f3d7913a918a44721704d27fa56facea0cd72a8093c7107c283 diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb index 401706d6..0b9aee02 100644 --- a/spec/inputs/integration/elasticsearch_spec.rb +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -72,25 +72,45 @@ describe 'against a secured elasticsearch', secure_integration: true do + # client_options is for an out-of-band helper let(:client_options) { { :ca_file => ca_file, :user => user, :password => password } } - let(:config) { super().merge('user' => user, 'password' => password, 'ssl' => true, 'ca_file' => ca_file) } + let(:config) { super().merge('user' => user, 'password' => password) } - it_behaves_like 'an elasticsearch index plugin' + shared_examples 'secured_elasticsearch' do + it_behaves_like 'an elasticsearch index plugin' - context "incorrect auth credentials" do + context "incorrect auth credentials" do - let(:config) do - super().merge('user' => 'archer', 'password' => 'b0gus!') - end + let(:config) do + super().merge('user' => 'archer', 'password' => 'b0gus!') + end - let(:queue) { [] } + let(:queue) { [] } - it "fails to run the plugin" do - expect { plugin.register }.to raise_error Elasticsearch::Transport::Transport::Errors::Unauthorized + it "fails to run the plugin" do + expect { plugin.register }.to raise_error Elasticsearch::Transport::Transport::Errors::Unauthorized + end end end + context 'with ca_file' do + let(:config) { super().merge('ssl' => true, 'ca_file' => ca_file) } + it_behaves_like 'secured_elasticsearch' + end + + context 'with `ca_trusted_fingerprint`' do + let(:ca_trusted_fingerprint) { File.read("spec/fixtures/test_certs/ca.der.sha256").chomp } + let(:config) { super().merge('ssl' => true, 'ca_trusted_fingerprint' => ca_trusted_fingerprint) } + + if Gem::Version.create(LOGSTASH_VERSION) >= Gem::Version.create("8.3.0") + it_behaves_like 'secured_elasticsearch' + else + it 'raises a configuration error' do + expect { plugin }.to raise_exception(LogStash::ConfigurationError, a_string_including("ca_trusted_fingerprint")) + end + end + end end context 'setting host:port', integration: true do From 657c3ff1b3b6326106b82ce8ca8decb618fa59cd Mon Sep 17 00:00:00 2001 From: Karol Bucek Date: Wed, 8 Jun 2022 07:11:52 +0200 Subject: [PATCH 145/207] Refactor: switch to using scheduler mixin (#177) Co-authored-by: Ry Biesemeyer --- .travis.yml | 2 +- CHANGELOG.md | 3 +++ lib/logstash/inputs/elasticsearch.rb | 18 +++++++----------- logstash-input-elasticsearch.gemspec | 4 ++-- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/.travis.yml b/.travis.yml index ae3b17c1..1ba5da93 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,4 +15,4 @@ env: - INTEGRATION=true ELASTIC_STACK_VERSION=8.x SNAPSHOT=true LOG_LEVEL=info - SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x LOG_LEVEL=info - SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x ES_SSL_SUPPORTED_PROTOCOLS=TLSv1.3 -- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.15.0 MANTICORE_VERSION=0.7.1 ELASTICSEARCH_VERSION=7.14.1 LOG_LEVEL=info +- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.16.3 MANTICORE_VERSION=0.7.1 ELASTICSEARCH_VERSION=7.14.1 LOG_LEVEL=info diff --git a/CHANGELOG.md b/CHANGELOG.md index f1bf557e..0cc2c42c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.14.0 + - Refactor: switch to using scheduler mixin [#177](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/177) + ## 4.13.0 - Added support for `ca_trusted_fingerprint` when run on Logstash 8.3+ [#178](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/178) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 196fab31..8bf15cd4 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -8,6 +8,7 @@ require 'logstash/plugin_mixins/ecs_compatibility_support' require 'logstash/plugin_mixins/ecs_compatibility_support/target_check' require 'logstash/plugin_mixins/ca_trusted_fingerprint_support' +require "logstash/plugin_mixins/scheduler" require "base64" require "elasticsearch" @@ -78,6 +79,8 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base extend LogStash::PluginMixins::ValidatorSupport::FieldReferenceValidationAdapter + include LogStash::PluginMixins::Scheduler + config_name "elasticsearch" # List of elasticsearch hosts to use for querying. @@ -251,21 +254,13 @@ def register def run(output_queue) if @schedule - @scheduler = Rufus::Scheduler.new(:max_work_threads => 1) - @scheduler.cron @schedule do - do_run(output_queue) - end - - @scheduler.join + scheduler.cron(@schedule) { do_run(output_queue) } + scheduler.join else do_run(output_queue) end end - def stop - @scheduler.stop if @scheduler - end - private def do_run(output_queue) @@ -274,9 +269,10 @@ def do_run(output_queue) logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8 + pipeline_id = execution_context&.pipeline_id || 'main' @slices.times.map do |slice_id| Thread.new do - LogStash::Util::set_thread_name("#{@id}_slice_#{slice_id}") + LogStash::Util::set_thread_name("[#{pipeline_id}]|input|elasticsearch|slice_#{slice_id}") do_run_slice(output_queue, slice_id) end end.map(&:join) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index d1e623a1..70add805 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.13.0' + s.version = '4.14.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -24,13 +24,13 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~> 1.3' s.add_runtime_dependency 'logstash-mixin-event_support', '~> 1.0' s.add_runtime_dependency "logstash-mixin-validator_support", '~> 1.0' + s.add_runtime_dependency "logstash-mixin-scheduler", '~> 1.0' s.add_runtime_dependency 'elasticsearch', '>= 7.17.1' s.add_runtime_dependency 'logstash-mixin-ca_trusted_fingerprint_support', '~> 1.0' s.add_runtime_dependency 'tzinfo' s.add_runtime_dependency 'tzinfo-data' - s.add_runtime_dependency 'rufus-scheduler' s.add_runtime_dependency 'manticore', ">= 0.7.1" s.add_development_dependency 'logstash-codec-plain' From 87ab7c95c830eee5a57d663d6d1f90e92a21c5be Mon Sep 17 00:00:00 2001 From: kaisecheng <69120390+kaisecheng@users.noreply.github.com> Date: Mon, 8 Aug 2022 16:44:20 +0100 Subject: [PATCH 146/207] add retry for query (#179) This commit adds `retries` option for query. After all retries fail, log an error msg Co-authored-by: Ry Biesemeyer Co-authored-by: Karen Metts <35154725+karenzone@users.noreply.github.com> --- CHANGELOG.md | 3 + docs/index.asciidoc | 13 ++ lib/logstash/helpers/loggable_try.rb | 18 ++ lib/logstash/inputs/elasticsearch.rb | 66 ++++++-- logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 242 ++++++++++++++++++++------- 6 files changed, 277 insertions(+), 67 deletions(-) create mode 100644 lib/logstash/helpers/loggable_try.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cc2c42c..ea8e69f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.15.0 + - Feat: add `retries` option. allow retry for failing query [#179](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/179) + ## 4.14.0 - Refactor: switch to using scheduler mixin [#177](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/177) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index d5ea9582..2f004db2 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -124,6 +124,7 @@ This plugin supports the following configuration options plus the <> |<>|No | <> | <>|No | <> | {logstash-ref}/field-references-deepdive.html[field reference] | No +| <> | <>|No | <> |<>|No |======================================================================= @@ -339,6 +340,17 @@ The maximum amount of time, in seconds, for a single request to Elasticsearch. Request timeouts tend to occur when an individual page of data is very large, such as when it contains large-payload documents and/or the <> has been specified as a large value. + +[id="plugins-{type}s-{plugin}-retries"] +===== `retries` + +* Value type is <> +* Default value is `0` + +The number of times to re-run the query after the first failure. If the query fails after all retries, it logs an error message. +The default is 0 (no retry). This value should be equal to or greater than zero. + + [id="plugins-{type}s-{plugin}-schedule"] ===== `schedule` @@ -424,6 +436,7 @@ When the `target` is set to a field reference, the `_source` of the hit is place This option can be useful to avoid populating unknown fields when a downstream schema such as ECS is enforced. It is also possible to target an entry in the event's metadata, which will be available during event processing but not exported to your outputs (e.g., `target \=> "[@metadata][_source]"`). + [id="plugins-{type}s-{plugin}-user"] ===== `user` diff --git a/lib/logstash/helpers/loggable_try.rb b/lib/logstash/helpers/loggable_try.rb new file mode 100644 index 00000000..d8030bac --- /dev/null +++ b/lib/logstash/helpers/loggable_try.rb @@ -0,0 +1,18 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +require 'stud/try' + +module LogStash module Helpers + class LoggableTry < Stud::Try + def initialize(logger, name) + @logger = logger + @name = name + end + + def log_failure(exception, fail_count, message) + @logger.warn("Attempt to #{@name} but failed. #{message}", fail_count: fail_count, exception: exception.message) + end + end +end end \ No newline at end of file diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 8bf15cd4..f44d6d16 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -10,6 +10,7 @@ require 'logstash/plugin_mixins/ca_trusted_fingerprint_support' require "logstash/plugin_mixins/scheduler" require "base64" +require 'logstash/helpers/loggable_try' require "elasticsearch" require "elasticsearch/transport/transport/http/manticore" @@ -99,6 +100,9 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # This allows you to set the maximum number of hits returned per scroll. config :size, :validate => :number, :default => 1000 + # The number of retries to run the query. If the query fails after all retries, it logs an error message. + config :retries, :validate => :number, :default => 0 + # This parameter controls the keepalive time in seconds of the scrolling # request and initiates the scrolling process. The timeout applies per # round trip (i.e. between the previous scroll request, to the next). @@ -221,6 +225,8 @@ def register @slices < 1 && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `slices` option must be greater than zero, got `#{@slices}`") end + @retries < 0 && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `retries` option must be equal or greater than zero, got `#{@retries}`") + validate_authentication fill_user_password_from_cloud_auth fill_hosts_from_cloud_id @@ -262,22 +268,67 @@ def run(output_queue) end private - + JOB_NAME = "run query" def do_run(output_queue) # if configured to run a single slice, don't bother spinning up threads - return do_run_slice(output_queue) if @slices.nil? || @slices <= 1 + if @slices.nil? || @slices <= 1 + success, events = retryable_slice + success && events.each { |event| output_queue << event } + return + end logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8 + slice_results = parallel_slice # array of tuple(ok, events) + + # insert events to queue if all slices success + if slice_results.all?(&:first) + slice_results.flat_map { |success, events| events } + .each { |event| output_queue << event } + end + + logger.trace("#{@slices} slices completed") + end + + def retryable(job_name, &block) + begin + stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name) + output = stud_try.try((@retries + 1).times) { yield } + [true, output] + rescue => e + error_details = {:message => e.message, :cause => e.cause} + error_details[:backtrace] = e.backtrace if logger.debug? + logger.error("Tried #{job_name} unsuccessfully", error_details) + [false, nil] + end + end + + + # @return [(ok, events)] : Array of tuple(Boolean, [Logstash::Event]) + def parallel_slice pipeline_id = execution_context&.pipeline_id || 'main' @slices.times.map do |slice_id| Thread.new do LogStash::Util::set_thread_name("[#{pipeline_id}]|input|elasticsearch|slice_#{slice_id}") - do_run_slice(output_queue, slice_id) + retryable_slice(slice_id) end - end.map(&:join) + end.map do |t| + t.join + t.value + end end + # @param scroll_id [Integer] + # @return (ok, events) [Boolean, Array(Logstash::Event)] + def retryable_slice(slice_id=nil) + retryable(JOB_NAME) do + output = [] + do_run_slice(output, slice_id) + output + end + end + + def do_run_slice(output_queue, slice_id=nil) slice_query = @base_query slice_query = slice_query.merge('slice' => { 'id' => slice_id, 'max' => @slices}) unless slice_id.nil? @@ -314,11 +365,6 @@ def process_next_scroll(output_queue, scroll_id) r = scroll_request(scroll_id) r['hits']['hits'].each { |hit| push_hit(hit, output_queue) } [r['hits']['hits'].any?, r['_scroll_id']] - rescue => e - # this will typically be triggered by a scroll timeout - logger.error("Scroll request error, aborting scroll", message: e.message, exception: e.class) - # return no hits and original scroll_id so we can try to clear it - [false, scroll_id] end def push_hit(hit, output_queue) @@ -353,7 +399,7 @@ def clear_scroll(scroll_id) logger.warn("Ignoring clear_scroll exception", message: e.message, exception: e.class) end - def scroll_request scroll_id + def scroll_request(scroll_id) @client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll) end diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 70add805..2e560fd2 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.14.0' + s.version = '4.15.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index e7f8948b..a86a3719 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -51,6 +51,17 @@ expect { plugin.register }.to raise_error(LogStash::ConfigurationError) end end + + context "retry" do + let(:config) do + { + "retries" => -1 + } + end + it "should raise an exception with negative number" do + expect { plugin.register }.to raise_error(LogStash::ConfigurationError) + end + end end it_behaves_like "an interruptible input plugin" do @@ -193,7 +204,7 @@ context 'with `slices => 1`' do let(:slices) { 1 } it 'runs just one slice' do - expect(plugin).to receive(:do_run_slice).with(duck_type(:<<)) + expect(plugin).to receive(:do_run_slice).with(duck_type(:<<), nil) expect(Thread).to_not receive(:new) plugin.register @@ -204,7 +215,7 @@ context 'without slices directive' do let(:config) { super().tap { |h| h.delete('slices') } } it 'runs just one slice' do - expect(plugin).to receive(:do_run_slice).with(duck_type(:<<)) + expect(plugin).to receive(:do_run_slice).with(duck_type(:<<), nil) expect(Thread).to_not receive(:new) plugin.register @@ -315,7 +326,6 @@ end # END SLICE 1 - let(:client) { Elasticsearch::Client.new } # RSpec mocks validations are not threadsafe. # Allow caller to synchronize. @@ -329,69 +339,112 @@ def synchronize_method!(object, method_name) end end - before(:each) do - expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) - plugin.register + describe "with normal response" do + before(:each) do + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + plugin.register - expect(client).to receive(:clear_scroll).and_return(nil) + expect(client).to receive(:clear_scroll).and_return(nil) - # SLICE0 is a three-page scroll in which the last page is empty - slice0_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 0, 'max' => 2})) - expect(client).to receive(:search).with(hash_including(:body => slice0_query)).and_return(slice0_response0) - expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll1 })).and_return(slice0_response1) - expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll2 })).and_return(slice0_response2) - allow(client).to receive(:ping) + # SLICE0 is a three-page scroll in which the last page is empty + slice0_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 0, 'max' => 2})) + expect(client).to receive(:search).with(hash_including(:body => slice0_query)).and_return(slice0_response0) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll1 })).and_return(slice0_response1) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll2 })).and_return(slice0_response2) + allow(client).to receive(:ping) - # SLICE1 is a two-page scroll in which the last page has no next scroll id - slice1_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 1, 'max' => 2})) - expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0) - expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_return(slice1_response1) + # SLICE1 is a two-page scroll in which the last page has no next scroll id + slice1_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 1, 'max' => 2})) + expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_return(slice1_response1) - synchronize_method!(plugin, :scroll_request) - synchronize_method!(plugin, :search_request) - end + synchronize_method!(plugin, :scroll_request) + synchronize_method!(plugin, :search_request) + end - let(:emitted_events) do - queue = Queue.new # since we are running slices in threads, we need a thread-safe queue. - plugin.run(queue) - events = [] - events << queue.pop until queue.empty? - events - end + let(:client) { Elasticsearch::Client.new } - let(:emitted_event_ids) do - emitted_events.map { |event| event.get('[@metadata][_id]') } - end + let(:emitted_events) do + queue = Queue.new # since we are running slices in threads, we need a thread-safe queue. + plugin.run(queue) + events = [] + events << queue.pop until queue.empty? + events + end - it 'emits the hits on the first page of the first slice' do - expect(emitted_event_ids).to include('slice0-response0-item0') - expect(emitted_event_ids).to include('slice0-response0-item1') - end - it 'emits the hits on the second page of the first slice' do - expect(emitted_event_ids).to include('slice0-response1-item0') - end + let(:emitted_event_ids) do + emitted_events.map { |event| event.get('[@metadata][_id]') } + end - it 'emits the hits on the first page of the second slice' do - expect(emitted_event_ids).to include('slice1-response0-item0') - expect(emitted_event_ids).to include('slice1-response0-item1') - end + it 'emits the hits on the first page of the first slice' do + expect(emitted_event_ids).to include('slice0-response0-item0') + expect(emitted_event_ids).to include('slice0-response0-item1') + end + it 'emits the hits on the second page of the first slice' do + expect(emitted_event_ids).to include('slice0-response1-item0') + end - it 'emits the hitson the second page of the second slice' do - expect(emitted_event_ids).to include('slice1-response1-item0') - expect(emitted_event_ids).to include('slice1-response1-item1') - end + it 'emits the hits on the first page of the second slice' do + expect(emitted_event_ids).to include('slice1-response0-item0') + expect(emitted_event_ids).to include('slice1-response0-item1') + end + + it 'emits the hits on the second page of the second slice' do + expect(emitted_event_ids).to include('slice1-response1-item0') + expect(emitted_event_ids).to include('slice1-response1-item1') + end + + it 'does not double-emit' do + expect(emitted_event_ids.uniq).to eq(emitted_event_ids) + end - it 'does not double-emit' do - expect(emitted_event_ids.uniq).to eq(emitted_event_ids) + it 'emits events with appropriate fields' do + emitted_events.each do |event| + expect(event).to be_a(LogStash::Event) + expect(event.get('message')).to eq(['hello, world']) + expect(event.get('[@metadata][_id]')).to_not be_nil + expect(event.get('[@metadata][_id]')).to_not be_empty + expect(event.get('[@metadata][_index]')).to start_with('logstash-') + end + end end - it 'emits events with appropriate fields' do - emitted_events.each do |event| - expect(event).to be_a(LogStash::Event) - expect(event.get('message')).to eq(['hello, world']) - expect(event.get('[@metadata][_id]')).to_not be_nil - expect(event.get('[@metadata][_id]')).to_not be_empty - expect(event.get('[@metadata][_index]')).to start_with('logstash-') + describe "with scroll request fail" do + before(:each) do + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + plugin.register + + expect(client).to receive(:clear_scroll).and_return(nil) + + # SLICE0 is a three-page scroll in which the second page throw exception + slice0_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 0, 'max' => 2})) + expect(client).to receive(:search).with(hash_including(:body => slice0_query)).and_return(slice0_response0) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll1 })).and_raise("boom") + allow(client).to receive(:ping) + + # SLICE1 is a two-page scroll in which the last page has no next scroll id + slice1_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 1, 'max' => 2})) + expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_return(slice1_response1) + + synchronize_method!(plugin, :scroll_request) + synchronize_method!(plugin, :search_request) + end + + let(:client) { Elasticsearch::Client.new } + + it 'does not insert event to queue' do + expect(plugin).to receive(:parallel_slice).and_wrap_original do |m, *args| + slice0, slice1 = m.call + expect(slice0[0]).to be_falsey + expect(slice1[0]).to be_truthy + expect(slice1[1].size).to eq(4) # four items from SLICE1 + [slice0, slice1] + end + + queue = Queue.new + plugin.run(queue) + expect(queue.size).to eq(0) end end end @@ -890,16 +943,93 @@ def wait_receive_request queue << LogStash::Event.new({}) }.at_least(:twice) runner = Thread.start { plugin.run(queue) } - sleep 3.0 + expect(queue.pop).not_to be_nil + cron_jobs = plugin.instance_variable_get(:@_scheduler).instance_variable_get(:@impl).jobs + expect(cron_jobs[0].next_time - cron_jobs[0].last_time).to be <= 5.0 + expect(queue.pop).not_to be_nil ensure plugin.do_stop runner.join if runner end - expect(queue.size).to be >= 2 end end + context "retries" do + let(:mock_response) do + { + "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "failed" => 0 + }, + "hits" => { + "total" => 1, + "max_score" => 1.0, + "hits" => [ { + "_index" => "logstash-2014.10.12", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "_source" => { "message" => ["ohayo"] } + } ] + } + } + end + + let(:mock_scroll_response) do + { + "_scroll_id" => "r453Wc1jh0caLJhSDg", + "hits" => { "hits" => [] } + } + end + + before(:each) do + client = Elasticsearch::Client.new + allow(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + allow(client).to receive(:search).with(any_args).and_return(mock_response) + allow(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(mock_scroll_response) + allow(client).to receive(:clear_scroll).and_return(nil) + allow(client).to receive(:ping) + end + + let(:config) do + { + "hosts" => ["localhost"], + "query" => '{ "query": { "match": { "city_name": "Okinawa" } }, "fields": ["message"] }', + "retries" => 1 + } + end + + it "retry and log error when all search request fail" do + expect(plugin.logger).to receive(:error).with(/Tried .* unsuccessfully/, + hash_including(:message => 'Manticore::UnknownException')) + expect(plugin.logger).to receive(:warn).twice.with(/Attempt to .* but failed/, + hash_including(:exception => "Manticore::UnknownException")) + expect(plugin).to receive(:search_request).with(instance_of(Hash)).and_raise(Manticore::UnknownException).at_least(:twice) + + plugin.register + + expect{ plugin.run(queue) }.not_to raise_error + expect(queue.size).to eq(0) + end + + it "retry successfully when search request fail for one time" do + expect(plugin.logger).to receive(:warn).once.with(/Attempt to .* but failed/, + hash_including(:exception => "Manticore::UnknownException")) + expect(plugin).to receive(:search_request).with(instance_of(Hash)).once.and_raise(Manticore::UnknownException) + expect(plugin).to receive(:search_request).with(instance_of(Hash)).once.and_call_original + + plugin.register + + expect{ plugin.run(queue) }.not_to raise_error + expect(queue.size).to eq(1) + end + end + # @note can be removed once we depends on elasticsearch gem >= 6.x def extract_transport(client) # on 7.x client.transport is a ES::Transport::Client client.transport.respond_to?(:transport) ? client.transport.transport : client.transport From 9d50d62de0a83a51d0aed4a35a52915f585488c1 Mon Sep 17 00:00:00 2001 From: mashhur Date: Wed, 10 Aug 2022 10:42:11 -0700 Subject: [PATCH 147/207] An option to control SSL certificate validation added. --- CHANGELOG.md | 3 +++ docs/index.asciidoc | 11 +++++++++++ lib/logstash/inputs/elasticsearch.rb | 10 ++++++++++ spec/inputs/elasticsearch_spec.rb | 8 ++++++++ 4 files changed, 32 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea8e69f3..e00ce085 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.16.0 + - Added `ssl_certificate_verification` option to control SSL certificate verification [#N](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/N) + ## 4.15.0 - Feat: add `retries` option. allow retry for failing query [#179](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/179) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 2f004db2..7998447d 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -122,6 +122,7 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |<>|No | <> |<>|No +| <> |<>|No | <> | <>|No | <> | {logstash-ref}/field-references-deepdive.html[field reference] | No | <> | <>|No @@ -414,6 +415,16 @@ instructions into the query. If enabled, SSL will be used when communicating with the Elasticsearch server (i.e. HTTPS will be used instead of plain HTTP). +[id="plugins-{type}s-{plugin}-ssl_certificate_verification"] +===== `ssl_certificate_verification` + +* Value type is <> +* Default value is `true` + +Option to validate the server's certificate. Disabling this severely compromises security. +For more information on disabling certificate verification please read +https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf + [id="plugins-{type}s-{plugin}-socket_timeout_seconds"] ===== `socket_timeout_seconds` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index f44d6d16..e727457c 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -190,6 +190,11 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # SSL Certificate Authority file in PEM encoded format, must also include any chain certificates as necessary config :ca_file, :validate => :path + # Option to validate the server's certificate. Disabling this severely compromises security. + # For more information on disabling certificate verification please read + # https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf + config :ssl_certificate_verification, :validate => :boolean, :default => true + # Schedule of when to periodically run statement, in Cron format # for example: "* * * * *" (execute query every minute, on the minute) # @@ -432,6 +437,11 @@ def setup_ssl ssl_options[:ssl] = true if @ssl ssl_options[:ca_file] = @ca_file if @ssl && @ca_file ssl_options[:trust_strategy] = trust_strategy_for_ca_trusted_fingerprint + if @ssl && !@ssl_certificate_verification + logger.warn "You have enabled encryption but DISABLED certificate verification, " + + "to make sure your data is secure remove `ssl_certificate_verification => false`" + ssl_options[:verify] = :disable + end ssl_options end diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index a86a3719..8004bd83 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -698,6 +698,14 @@ def synchronize_method!(object, method_name) expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Multiple authentication options are specified/ end end + + context 'ssl verification disabled' do + let(:config) { super().merge({ 'ssl_certificate_verification' => false }) } + it 'should warn data security risk' do + expect(plugin.logger).to receive(:warn).once.with("You have enabled encryption but DISABLED certificate verification, to make sure your data is secure remove `ssl_certificate_verification => false`") + plugin.register + end + end end end if LOGSTASH_VERSION > '6.0' From f513b4468e7f3f7dde2288d18d0deb788163be76 Mon Sep 17 00:00:00 2001 From: mashhur Date: Wed, 10 Aug 2022 10:51:23 -0700 Subject: [PATCH 148/207] Add the PR number in the changelog. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e00ce085..9a973686 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ ## 4.16.0 - - Added `ssl_certificate_verification` option to control SSL certificate verification [#N](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/N) + - Added `ssl_certificate_verification` option to control SSL certificate verification [#180](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/180) ## 4.15.0 - Feat: add `retries` option. allow retry for failing query [#179](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/179) From 8d64da00b9d5934969887f7ff56176902c70f29f Mon Sep 17 00:00:00 2001 From: Mashhur <99575341+mashhurs@users.noreply.github.com> Date: Wed, 10 Aug 2022 13:58:39 -0700 Subject: [PATCH 149/207] Security abuse scenarios explanation added. Co-authored-by: Ry Biesemeyer --- docs/index.asciidoc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 7998447d..e619f3eb 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -422,6 +422,10 @@ server (i.e. HTTPS will be used instead of plain HTTP). * Default value is `true` Option to validate the server's certificate. Disabling this severely compromises security. +When certificate validation is disabled this plugin implicitly trusts the machine +resolved at the given address without validating its proof-of-identity, which can +lead to this plugin transmitting credentials to or processing data from an untrustworthy +man-in-the-middle or other compromised infrastructure. For more information on disabling certificate verification please read https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf From b0521379742ceee0e2ecc3c5b84df906a0456b99 Mon Sep 17 00:00:00 2001 From: Mashhur <99575341+mashhurs@users.noreply.github.com> Date: Thu, 11 Aug 2022 10:43:50 -0700 Subject: [PATCH 150/207] Apply suggestions from code review Co-authored-by: Karen Metts <35154725+karenzone@users.noreply.github.com> --- docs/index.asciidoc | 10 +++++----- lib/logstash/inputs/elasticsearch.rb | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index e619f3eb..d883c658 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -422,12 +422,12 @@ server (i.e. HTTPS will be used instead of plain HTTP). * Default value is `true` Option to validate the server's certificate. Disabling this severely compromises security. -When certificate validation is disabled this plugin implicitly trusts the machine -resolved at the given address without validating its proof-of-identity, which can -lead to this plugin transmitting credentials to or processing data from an untrustworthy +When certificate validation is disabled, this plugin implicitly trusts the machine +resolved at the given address without validating its proof-of-identity. +In this scenario, the plugin can transmit credentials to or process data from an untrustworthy man-in-the-middle or other compromised infrastructure. -For more information on disabling certificate verification please read -https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf +More information on the importance of certificate verification: +**https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf**. [id="plugins-{type}s-{plugin}-socket_timeout_seconds"] ===== `socket_timeout_seconds` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index e727457c..7edcf727 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -191,7 +191,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base config :ca_file, :validate => :path # Option to validate the server's certificate. Disabling this severely compromises security. - # For more information on disabling certificate verification please read + # For more information on the importance of certificate verification please read # https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf config :ssl_certificate_verification, :validate => :boolean, :default => true From e1bf988de09700b3c965200154f39665131e857d Mon Sep 17 00:00:00 2001 From: mashhur Date: Fri, 12 Aug 2022 08:57:24 -0700 Subject: [PATCH 151/207] Bump gem version missed by #180 PR. --- logstash-input-elasticsearch.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 2e560fd2..b7aeefb0 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.15.0' + s.version = '4.16.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From f461a59af9c8ba911e4922f26ee1b9b099574cb4 Mon Sep 17 00:00:00 2001 From: Edmo Vamerlatti Costa <11836452+edmocosta@users.noreply.github.com> Date: Tue, 7 Mar 2023 16:28:25 +0100 Subject: [PATCH 152/207] Fix docker-setup and jruby bundler for test (#186) - Use the bundler in `/vendor/bundle/jruby/2.6.0/bin/bundle` instead of `/vendor/jruby/bin/bundle` to run test, as Logstash doesn't package the latter in artifacts in https://github.com/elastic/logstash/pull/14667 - Fixed docker-setup.sh so it can download the snapshots images --- .ci/docker-setup.sh | 45 ++++++++------------------------------------- .ci/logstash-run.sh | 4 ++-- 2 files changed, 10 insertions(+), 39 deletions(-) diff --git a/.ci/docker-setup.sh b/.ci/docker-setup.sh index 7145a5aa..6b6aa730 100755 --- a/.ci/docker-setup.sh +++ b/.ci/docker-setup.sh @@ -4,44 +4,16 @@ # Ensure you have Docker installed locally and set the ELASTIC_STACK_VERSION environment variable. set -e +VERSION_URL="https://raw.githubusercontent.com/elastic/logstash/master/ci/logstash_releases.json" -download_and_load_docker_snapshot_artifact() { - project="${1?project name required}" - - artifact_type="docker-image" - artifact_name_base="${project}${DISTRIBUTION_SUFFIX}-${ELASTIC_STACK_VERSION}-${artifact_type}" - echo "Downloading snapshot docker image: ${project}${DISTRIBUTION_SUFFIX} (${ELASTIC_STACK_VERSION})" - - artifact_name_noarch="${artifact_name_base}.tar.gz" - artifact_name_arch="${artifact_name_base}-x86_64.tar.gz" - - jq_extract_artifact_url=".build.projects.\"${project}\".packages | (.\"${artifact_name_noarch}\" // .\"${artifact_name_arch}\") | .url" - - artifact_list=$(curl --silent "https://artifacts-api.elastic.co/v1/versions/$%7BELASTIC_STACK_VERSION%7D/builds/latest") - artifact_url=$(echo "${artifact_list}" | jq --raw-output "${jq_extract_artifact_url}") - - if [[ "${artifact_url}" == "null" ]]; then - echo "Failed to find '${artifact_name_noarch}'" - echo "Failed to find '${artifact_name_arch}'" - echo "Listing:" - echo "${artifact_list}" | jq --raw-output ".build.projects.\"${project}\".packages | keys | map(select(contains(\"${artifact_type}\")))" - return 1 - fi - - echo "${artifact_url}" - cd /tmp - curl "${artifact_url}" > "${project}-docker-image.tar.gz" - tar xfvz "${project}-docker-image.tar.gz" repositories - echo "Loading ${project} docker image: " - cat repositories - docker load < "${project}-docker-image.tar.gz" - rm "${project}-docker-image.tar.gz" - cd - +pull_docker_snapshot() { + project="${1?project name required}" + local docker_image="docker.elastic.co/${project}/${project}:${ELASTIC_STACK_VERSION}" + echo "Pulling $docker_image" + docker pull "$docker_image" } -VERSION_URL="https://raw.githubusercontent.com/elastic/logstash/master/ci/logstash_releases.json" - if [ "$ELASTIC_STACK_VERSION" ]; then echo "Fetching versions from $VERSION_URL" VERSIONS=$(curl --silent $VERSION_URL) @@ -62,9 +34,9 @@ if [ "$ELASTIC_STACK_VERSION" ]; then echo "Testing against version: $ELASTIC_STACK_VERSION" if [[ "$ELASTIC_STACK_VERSION" = *"-SNAPSHOT" ]]; then - download_and_load_docker_snapshot_artifact "logstash" + pull_docker_snapshot "logstash" if [ "$INTEGRATION" == "true" ]; then - download_and_load_docker_snapshot_artifact "elasticsearch" + pull_docker_snapshot "elasticsearch" fi fi @@ -86,4 +58,3 @@ else echo "For example: export ELASTIC_STACK_VERSION=6.2.4" exit 1 fi - diff --git a/.ci/logstash-run.sh b/.ci/logstash-run.sh index a0553960..61892fbe 100755 --- a/.ci/logstash-run.sh +++ b/.ci/logstash-run.sh @@ -26,7 +26,7 @@ wait_for_es() { } if [[ "$INTEGRATION" != "true" ]]; then - jruby -rbundler/setup -S rspec -fd --tag ~integration --tag ~secure_integration spec/inputs + bundle exec rspec --format=documentation spec/inputs --tag ~integration --tag ~secure_integration else if [[ "$SECURE_INTEGRATION" == "true" ]]; then extra_tag_args="--tag secure_integration" @@ -37,5 +37,5 @@ else echo "Waiting for elasticsearch to respond..." wait_for_es echo "Elasticsearch is Up !" - jruby -rbundler/setup -S rspec -fd $extra_tag_args --tag es_version:$ELASTIC_STACK_VERSION spec/inputs/integration + bundle exec rspec --format=documentation $extra_tag_args --tag update_tests:painless --tag es_version:$ELASTIC_STACK_VERSION spec/inputs/integration fi From b37536abbfa013bb94f1b072dbab0a42ad1e82e6 Mon Sep 17 00:00:00 2001 From: Edmo Vamerlatti Costa <11836452+edmocosta@users.noreply.github.com> Date: Fri, 10 Mar 2023 17:44:26 +0100 Subject: [PATCH 153/207] Standardize and add SSL settings (#185) This commit made the plugin SSL settings consistent with the naming convention defined in the meta issue: https://github.com/elastic/logstash/issues/14905. It added the following SSL settings: ssl_enabled: Enable/disable the SSL settings. Infer the value from the hosts' scheme if neither the deprecated `:ssl` nor the new `:ssl_enabled` configs were set ssl_certificate: OpenSSL-style X.509 certificate file to authenticate the client ssl_key: OpenSSL-style RSA private key that corresponds to the ssl_certificate ssl_truststore_path: The JKS truststore to validate the server's certificate ssl_truststore_type: The format of the truststore file ssl_truststore_password: The truststore password ssl_keystore_path: The keystore used to present a certificate to the server ssl_keystore_type: The format of the keystore file ssl_keystore_password: The keystore password ssl_cipher_suites: The list of cipher suites to use ssl_supported_protocols: Supported protocols with versions And deprecated: ssl in favor of ssl_enabled: ca_file in favor of ssl_certificate_authorities ssl_certificate_verification in favor of ssl_verification_mode --- CHANGELOG.md | 18 ++ docs/index.asciidoc | 219 +++++++++++++-- lib/logstash/inputs/elasticsearch.rb | 181 ++++++++++-- logstash-input-elasticsearch.gemspec | 3 +- spec/inputs/elasticsearch_spec.rb | 6 +- spec/inputs/elasticsearch_ssl_spec.rb | 265 ++++++++++++++++++ spec/inputs/integration/elasticsearch_spec.rb | 8 +- 7 files changed, 645 insertions(+), 55 deletions(-) create mode 100644 spec/inputs/elasticsearch_ssl_spec.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a973686..fc7c7638 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,21 @@ +## 4.17.0 + - Added SSL settings for: [#185](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/185) + - `ssl_enabled`: Enable/disable the SSL settings. If not provided, the value is inferred from the hosts scheme + - `ssl_certificate`: OpenSSL-style X.509 certificate file to authenticate the client + - `ssl_key`: OpenSSL-style RSA private key that corresponds to the `ssl_certificate` + - `ssl_truststore_path`: The JKS truststore to validate the server's certificate + - `ssl_truststore_type`: The format of the truststore file + - `ssl_truststore_password`: The truststore password + - `ssl_keystore_path`: The keystore used to present a certificate to the server + - `ssl_keystore_type`: The format of the keystore file + - `ssl_keystore_password`: The keystore password + - `ssl_cipher_suites`: The list of cipher suites to use + - `ssl_supported_protocols`: Supported protocols with versions + - Reviewed and deprecated SSL settings to comply with Logstash's naming convention + - Deprecated `ssl` in favor of `ssl_enabled` + - Deprecated `ca_file` in favor of `ssl_certificate_authorities` + - Deprecated `ssl_certificate_verification` in favor of `ssl_verification_mode` + ## 4.16.0 - Added `ssl_certificate_verification` option to control SSL certificate verification [#180](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/180) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index d883c658..c44ab493 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -96,13 +96,12 @@ TIP: Set the `target` option to avoid potential schema conflicts. [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Input configuration options -This plugin supports the following configuration options plus the <> described later. +This plugin supports the following configuration options plus the <> and the <> described later. [cols="<,<,<",options="header",] |======================================================================= |Setting |Input type|Required | <> |<>|No -| <> |a valid filesystem path|No | <> |<>|No | <> |<>|No | <> |<>|No @@ -121,8 +120,19 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |<>|No | <> |<>|No -| <> |<>|No -| <> |<>|No +| <> |<>|No +| <> |list of <>|No +| <> |list of <>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>|No +| <> |<>, one of `["full", "none"]`|No | <> | <>|No | <> | {logstash-ref}/field-references-deepdive.html[field reference] | No | <> | <>|No @@ -140,21 +150,13 @@ input plugins. * Value type is <> * There is no default value for this setting. -Authenticate using Elasticsearch API key. Note that this option also requires enabling the `ssl` option. +Authenticate using Elasticsearch API key. Note that this option also requires enabling the <> option. Format is `id:api_key` where `id` and `api_key` are as returned by the Elasticsearch {ref}/security-api-create-api-key.html[Create API key API]. -[id="plugins-{type}s-{plugin}-ca_file"] -===== `ca_file` - - * Value type is <> - * There is no default value for this setting. - -SSL Certificate Authority file in PEM encoded format, must also include any chain certificates as necessary. - [id="plugins-{type}s-{plugin}-ca_trusted_fingerprint"] ===== `ca_trusted_fingerprint` @@ -406,28 +408,138 @@ NOTE: The Elasticsearch manual indicates that there can be _negative_ performanc If the `slices` parameter is left unset, the plugin will _not_ inject slice instructions into the query. -[id="plugins-{type}s-{plugin}-ssl"] -===== `ssl` +[id="plugins-{type}s-{plugin}-ssl_certificate"] +===== `ssl_certificate` + * Value type is <> + * There is no default value for this setting. + +SSL certificate to use to authenticate the client. This certificate should be an OpenSSL-style X.509 certificate file. + +NOTE: This setting can be used only if <> is set. + +[id="plugins-{type}s-{plugin}-ssl_certificate_authorities"] +===== `ssl_certificate_authorities` + + * Value type is a list of <> + * There is no default value for this setting + +The `.cer` or `.pem` files to validate the server's certificate. + +NOTE: You cannot use this setting and <> at the same time. + +[id="plugins-{type}s-{plugin}-ssl_cipher_suites"] +===== `ssl_cipher_suites` + * Value type is a list of <> + * There is no default value for this setting + +The list of cipher suites to use, listed by priorities. +Supported cipher suites vary depending on the Java and protocol versions. + +[id="plugins-{type}s-{plugin}-ssl_enabled"] +===== `ssl_enabled` * Value type is <> - * Default value is `false` + * There is no default value for this setting. -If enabled, SSL will be used when communicating with the Elasticsearch -server (i.e. HTTPS will be used instead of plain HTTP). +Enable SSL/TLS secured communication to Elasticsearch cluster. +Leaving this unspecified will use whatever scheme is specified in the URLs listed in <> or extracted from the <>. +If no explicit protocol is specified plain HTTP will be used. -[id="plugins-{type}s-{plugin}-ssl_certificate_verification"] -===== `ssl_certificate_verification` +[id="plugins-{type}s-{plugin}-ssl_key"] +===== `ssl_key` + * Value type is <> + * There is no default value for this setting. -* Value type is <> -* Default value is `true` +OpenSSL-style RSA private key that corresponds to the <>. -Option to validate the server's certificate. Disabling this severely compromises security. -When certificate validation is disabled, this plugin implicitly trusts the machine -resolved at the given address without validating its proof-of-identity. -In this scenario, the plugin can transmit credentials to or process data from an untrustworthy -man-in-the-middle or other compromised infrastructure. -More information on the importance of certificate verification: -**https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf**. +NOTE: This setting can be used only if <> is set. + +[id="plugins-{type}s-{plugin}-ssl_keystore_password"] +===== `ssl_keystore_password` + + * Value type is <> + * There is no default value for this setting. + +Set the keystore password + +[id="plugins-{type}s-{plugin}-ssl_keystore_path"] +===== `ssl_keystore_path` + + * Value type is <> + * There is no default value for this setting. + +The keystore used to present a certificate to the server. +It can be either `.jks` or `.p12` + +NOTE: You cannot use this setting and <> at the same time. + +[id="plugins-{type}s-{plugin}-ssl_keystore_type"] +===== `ssl_keystore_type` + + * Value can be any of: `jks`, `pkcs12` + * If not provided, the value will be inferred from the keystore filename. + +The format of the keystore file. It must be either `jks` or `pkcs12`. + +[id="plugins-{type}s-{plugin}-ssl_supported_protocols"] +===== `ssl_supported_protocols` + + * Value type is <> + * Allowed values are: `'TLSv1.1'`, `'TLSv1.2'`, `'TLSv1.3'` + * Default depends on the JDK being used. With up-to-date Logstash, the default is `['TLSv1.2', 'TLSv1.3']`. + `'TLSv1.1'` is not considered secure and is only provided for legacy applications. + +List of allowed SSL/TLS versions to use when establishing a connection to the Elasticsearch cluster. + +For Java 8 `'TLSv1.3'` is supported only since **8u262** (AdoptOpenJDK), but requires that you set the +`LS_JAVA_OPTS="-Djdk.tls.client.protocols=TLSv1.3"` system property in Logstash. + +NOTE: If you configure the plugin to use `'TLSv1.1'` on any recent JVM, such as the one packaged with Logstash, +the protocol is disabled by default and needs to be enabled manually by changing `jdk.tls.disabledAlgorithms` in +the *$JDK_HOME/conf/security/java.security* configuration file. That is, `TLSv1.1` needs to be removed from the list. + +[id="plugins-{type}s-{plugin}-ssl_truststore_password"] +===== `ssl_truststore_password` + + * Value type is <> + * There is no default value for this setting. + +Set the truststore password. + +[id="plugins-{type}s-{plugin}-ssl_truststore_path"] +===== `ssl_truststore_path` + + * Value type is <> + * There is no default value for this setting. + +The truststore to validate the server's certificate. +It can be either .jks or .p12. + +NOTE: You cannot use this setting and <> at the same time. + +[id="plugins-{type}s-{plugin}-ssl_truststore_type"] +===== `ssl_truststore_type` + + * Value can be any of: `jks`, `pkcs12` + * If not provided, the value will be inferred from the truststore filename. + +The format of the truststore file. It must be either `jks` or `pkcs12`. + +[id="plugins-{type}s-{plugin}-ssl_verification_mode"] +===== `ssl_verification_mode` + + * Value can be any of: `full`, `none` + * Default value is `full` + +Defines how to verify the certificates presented by another party in the TLS connection: + +`full` validates that the server certificate has an issue date that’s within +the not_before and not_after dates; chains to a trusted Certificate Authority (CA), and +has a hostname or IP address that matches the names within the certificate. + +`none` performs no certificate validation. + +WARNING: Setting certificate verification to `none` disables many security benefits of SSL/TLS, which is very dangerous. For more information on disabling certificate verification please read https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf [id="plugins-{type}s-{plugin}-socket_timeout_seconds"] ===== `socket_timeout_seconds` @@ -463,6 +575,55 @@ option when authenticating to the Elasticsearch server. If set to an empty string authentication will be disabled. +[id="plugins-{type}s-{plugin}-deprecated-options"] +==== Elasticsearch Input deprecated configuration options + +This plugin supports the following deprecated configurations. + +WARNING: Deprecated options are subject to removal in future releases. + +[cols="<,<,<",options="header",] +|======================================================================= +|Setting|Input type|Replaced by +| <> |a valid filesystem path|<> +| <> |<>|<> +| <> |<>|<> +|======================================================================= + +[id="plugins-{type}s-{plugin}-ca_file"] +===== `ca_file` +deprecated[4.17.0, Replaced by <>] + +* Value type is <> +* There is no default value for this setting. + +SSL Certificate Authority file in PEM encoded format, must also include any chain certificates as necessary. + +[id="plugins-{type}s-{plugin}-ssl"] +===== `ssl` +deprecated[4.17.0, Replaced by <>] + +* Value type is <> +* Default value is `false` + +If enabled, SSL will be used when communicating with the Elasticsearch +server (i.e. HTTPS will be used instead of plain HTTP). + + +[id="plugins-{type}s-{plugin}-ssl_certificate_verification"] +===== `ssl_certificate_verification` +deprecated[4.17.0, Replaced by <>] + +* Value type is <> +* Default value is `true` + +Option to validate the server's certificate. Disabling this severely compromises security. +When certificate validation is disabled, this plugin implicitly trusts the machine +resolved at the given address without validating its proof-of-identity. +In this scenario, the plugin can transmit credentials to or process data from an untrustworthy +man-in-the-middle or other compromised infrastructure. +More information on the importance of certificate verification: +**https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf**. [id="plugins-{type}s-{plugin}-common-options"] include::{include_path}/{type}.asciidoc[] diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 7edcf727..1f3251ee 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -9,6 +9,7 @@ require 'logstash/plugin_mixins/ecs_compatibility_support/target_check' require 'logstash/plugin_mixins/ca_trusted_fingerprint_support' require "logstash/plugin_mixins/scheduler" +require "logstash/plugin_mixins/normalize_config_support" require "base64" require 'logstash/helpers/loggable_try' @@ -82,6 +83,8 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base include LogStash::PluginMixins::Scheduler + include LogStash::PluginMixins::NormalizeConfigSupport + config_name "elasticsearch" # List of elasticsearch hosts to use for querying. @@ -185,15 +188,60 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base config :proxy, :validate => :uri_or_empty # SSL - config :ssl, :validate => :boolean, :default => false + config :ssl, :validate => :boolean, :default => false, :deprecated => "Set 'ssl_enabled' instead." + + # SSL Certificate Authority file in PEM encoded format, must also include any chain certificates as necessary + config :ca_file, :validate => :path, :deprecated => "Set 'ssl_certificate_authorities' instead." - # SSL Certificate Authority file in PEM encoded format, must also include any chain certificates as necessary - config :ca_file, :validate => :path + # OpenSSL-style X.509 certificate certificate to authenticate the client + config :ssl_certificate, :validate => :path + + # SSL Certificate Authority files in PEM encoded format, must also include any chain certificates as necessary + config :ssl_certificate_authorities, :validate => :path, :list => true # Option to validate the server's certificate. Disabling this severely compromises security. # For more information on the importance of certificate verification please read # https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf - config :ssl_certificate_verification, :validate => :boolean, :default => true + config :ssl_certificate_verification, :validate => :boolean, :default => true, :deprecated => "Set 'ssl_verification_mode' instead." + + # The list of cipher suites to use, listed by priorities. + # Supported cipher suites vary depending on which version of Java is used. + config :ssl_cipher_suites, :validate => :string, :list => true + + # SSL + config :ssl_enabled, :validate => :boolean + + # OpenSSL-style RSA private key to authenticate the client + config :ssl_key, :validate => :path + + # Set the keystore password + config :ssl_keystore_password, :validate => :password + + # The keystore used to present a certificate to the server. + # It can be either .jks or .p12 + config :ssl_keystore_path, :validate => :path + + # The format of the keystore file. It must be either jks or pkcs12 + config :ssl_keystore_type, :validate => %w[pkcs12 jks] + + # Supported protocols with versions. + config :ssl_supported_protocols, :validate => %w[TLSv1.1 TLSv1.2 TLSv1.3], :default => [], :list => true + + # Set the truststore password + config :ssl_truststore_password, :validate => :password + + # The JKS truststore to validate the server's certificate. + # Use either `:ssl_truststore_path` or `:ssl_certificate_authorities` + config :ssl_truststore_path, :validate => :path + + # The format of the truststore file. It must be either jks or pkcs12 + config :ssl_truststore_type, :validate => %w[pkcs12 jks] + + # Options to verify the server's certificate. + # "full": validates that the provided certificate has an issue date that’s within the not_before and not_after dates; + # chains to a trusted Certificate Authority (CA); has a hostname or IP address that matches the names within the certificate. + # "none": performs no certificate validation. Disabling this severely compromises security (https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf) + config :ssl_verification_mode, :validate => %w[full none], :default => 'full' # Schedule of when to periodically run statement, in Cron format # for example: "* * * * *" (execute query every minute, on the minute) @@ -219,6 +267,9 @@ def initialize(params={}) def register require "rufus/scheduler" + fill_hosts_from_cloud_id + setup_ssl_params! + @options = { :index => @index, :scroll => @scroll, @@ -234,8 +285,6 @@ def register validate_authentication fill_user_password_from_cloud_auth - fill_hosts_from_cloud_id - transport_options = {:headers => {}} transport_options[:headers].merge!(setup_basic_auth(user, password)) @@ -246,7 +295,7 @@ def register transport_options[:socket_timeout] = @socket_timeout_seconds unless @socket_timeout_seconds.nil? hosts = setup_hosts - ssl_options = setup_ssl + ssl_options = setup_client_ssl @logger.warn "Supplied proxy setting (proxy => '') has no effect" if @proxy.eql?('') @@ -416,6 +465,15 @@ def hosts_default?(hosts) hosts.nil? || ( hosts.is_a?(Array) && hosts.empty? ) end + def effectively_ssl? + return true if @ssl_enabled + + hosts = Array(@hosts) + return false if hosts.nil? || hosts.empty? + + hosts.all? { |host| host && host.to_s.start_with?("https") } + end + def validate_authentication authn_options = 0 authn_options += 1 if @cloud_auth @@ -426,26 +484,113 @@ def validate_authentication raise LogStash::ConfigurationError, 'Multiple authentication options are specified, please only use one of user/password, cloud_auth or api_key' end - if @api_key && @api_key.value && @ssl != true - raise(LogStash::ConfigurationError, "Using api_key authentication requires SSL/TLS secured communication using the `ssl => true` option") + if @api_key && @api_key.value && @ssl_enabled != true + raise(LogStash::ConfigurationError, "Using api_key authentication requires SSL/TLS secured communication using the `ssl_enabled => true` option") end end - def setup_ssl + def setup_client_ssl ssl_options = {} + ssl_options[:ssl] = true if @ssl_enabled - ssl_options[:ssl] = true if @ssl - ssl_options[:ca_file] = @ca_file if @ssl && @ca_file - ssl_options[:trust_strategy] = trust_strategy_for_ca_trusted_fingerprint - if @ssl && !@ssl_certificate_verification - logger.warn "You have enabled encryption but DISABLED certificate verification, " + - "to make sure your data is secure remove `ssl_certificate_verification => false`" - ssl_options[:verify] = :disable + unless @ssl_enabled + # Keep it backward compatible with the deprecated `ssl` option + ssl_options[:trust_strategy] = trust_strategy_for_ca_trusted_fingerprint if original_params.include?('ssl') + return ssl_options + end + + ssl_certificate_authorities, ssl_truststore_path, ssl_certificate, ssl_keystore_path = params.values_at('ssl_certificate_authorities', 'ssl_truststore_path', 'ssl_certificate', 'ssl_keystore_path') + + if ssl_certificate_authorities && ssl_truststore_path + raise LogStash::ConfigurationError, 'Use either "ssl_certificate_authorities/ca_file" or "ssl_truststore_path" when configuring the CA certificate' + end + + if ssl_certificate && ssl_keystore_path + raise LogStash::ConfigurationError, 'Use either "ssl_certificate" or "ssl_keystore_path/keystore" when configuring client certificates' end + if ssl_certificate_authorities&.any? + raise LogStash::ConfigurationError, 'Multiple values on "ssl_certificate_authorities" are not supported by this plugin' if ssl_certificate_authorities.size > 1 + ssl_options[:ca_file] = ssl_certificate_authorities.first + end + + if ssl_truststore_path + ssl_options[:truststore] = ssl_truststore_path + ssl_options[:truststore_type] = params["ssl_truststore_type"] if params.include?("ssl_truststore_type") + ssl_options[:truststore_password] = params["ssl_truststore_password"].value if params.include?("ssl_truststore_password") + end + + if ssl_keystore_path + ssl_options[:keystore] = ssl_keystore_path + ssl_options[:keystore_type] = params["ssl_keystore_type"] if params.include?("ssl_keystore_type") + ssl_options[:keystore_password] = params["ssl_keystore_password"].value if params.include?("ssl_keystore_password") + end + + ssl_key = params["ssl_key"] + if ssl_certificate + raise LogStash::ConfigurationError, 'Using an "ssl_certificate" requires an "ssl_key"' unless ssl_key + ssl_options[:client_cert] = ssl_certificate + ssl_options[:client_key] = ssl_key + elsif !ssl_key.nil? + raise LogStash::ConfigurationError, 'An "ssl_certificate" is required when using an "ssl_key"' + end + + ssl_verification_mode = params["ssl_verification_mode"] + unless ssl_verification_mode.nil? + case ssl_verification_mode + when 'none' + logger.warn "You have enabled encryption but DISABLED certificate verification, " + + "to make sure your data is secure set `ssl_verification_mode => full`" + ssl_options[:verify] = :disable + else + ssl_options[:verify] = :strict + end + end + + ssl_options[:cipher_suites] = params["ssl_cipher_suites"] if params.include?("ssl_cipher_suites") + + protocols = params['ssl_supported_protocols'] + ssl_options[:protocols] = protocols if protocols&.any? + ssl_options[:trust_strategy] = trust_strategy_for_ca_trusted_fingerprint + ssl_options end + def setup_ssl_params! + @ssl_enabled = normalize_config(:ssl_enabled) do |normalize| + normalize.with_deprecated_alias(:ssl) + end + + # Infer the value if neither the deprecate `ssl` and `ssl_enabled` were set + infer_ssl_enabled_from_hosts + + @ssl_certificate_authorities = normalize_config(:ssl_certificate_authorities) do |normalize| + normalize.with_deprecated_mapping(:ca_file) do |ca_file| + [ca_file] + end + end + + @ssl_verification_mode = normalize_config(:ssl_verification_mode) do |normalize| + normalize.with_deprecated_mapping(:ssl_certificate_verification) do |ssl_certificate_verification| + if ssl_certificate_verification == true + "full" + else + "none" + end + end + end + + params['ssl_enabled'] = @ssl_enabled + params['ssl_certificate_authorities'] = @ssl_certificate_authorities unless @ssl_certificate_authorities.nil? + params['ssl_verification_mode'] = @ssl_verification_mode unless @ssl_verification_mode.nil? + end + + def infer_ssl_enabled_from_hosts + return if original_params.include?('ssl') || original_params.include?('ssl_enabled') + + @ssl_enabled = params['ssl_enabled'] = effectively_ssl? + end + def setup_hosts @hosts = Array(@hosts).map { |host| host.to_s } # potential SafeURI#to_s @hosts.map do |h| @@ -453,7 +598,7 @@ def setup_hosts h else host, port = h.split(':') - { host: host, port: port, scheme: (@ssl ? 'https' : 'http') } + { host: host, port: port, scheme: (@ssl_enabled ? 'https' : 'http') } end end end diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index b7aeefb0..5a98d2f0 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.16.0' + s.version = '4.17.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -28,6 +28,7 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'elasticsearch', '>= 7.17.1' s.add_runtime_dependency 'logstash-mixin-ca_trusted_fingerprint_support', '~> 1.0' + s.add_runtime_dependency 'logstash-mixin-normalize_config_support', '~>1.0' s.add_runtime_dependency 'tzinfo' s.add_runtime_dependency 'tzinfo-data' diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 8004bd83..1aa31410 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -681,7 +681,7 @@ def synchronize_method!(object, method_name) end context "with ssl" do - let(:config) { super().merge({ 'api_key' => LogStash::Util::Password.new('foo:bar'), "ssl" => true }) } + let(:config) { super().merge({ 'api_key' => LogStash::Util::Password.new('foo:bar'), "ssl_enabled" => true }) } it "should set authorization" do plugin.register @@ -700,9 +700,9 @@ def synchronize_method!(object, method_name) end context 'ssl verification disabled' do - let(:config) { super().merge({ 'ssl_certificate_verification' => false }) } + let(:config) { super().merge({ 'ssl_verification_mode' => 'none' }) } it 'should warn data security risk' do - expect(plugin.logger).to receive(:warn).once.with("You have enabled encryption but DISABLED certificate verification, to make sure your data is secure remove `ssl_certificate_verification => false`") + expect(plugin.logger).to receive(:warn).once.with("You have enabled encryption but DISABLED certificate verification, to make sure your data is secure set `ssl_verification_mode => full`") plugin.register end end diff --git a/spec/inputs/elasticsearch_ssl_spec.rb b/spec/inputs/elasticsearch_ssl_spec.rb new file mode 100644 index 00000000..2189fd82 --- /dev/null +++ b/spec/inputs/elasticsearch_ssl_spec.rb @@ -0,0 +1,265 @@ +require 'stud/temporary' +require "elasticsearch" + +describe "SSL options" do + let(:es_client_double) { double("Elasticsearch::Client #{self.inspect}") } + let(:hosts) {["localhost"]} + let(:settings) { { "ssl_enabled" => true, "hosts" => hosts } } + + subject do + require "logstash/inputs/elasticsearch" + LogStash::Inputs::Elasticsearch.new(settings) + end + + before do + allow(es_client_double).to receive(:close) + allow(es_client_double).to receive(:ping).with(any_args).and_return(double("pong").as_null_object) + allow(Elasticsearch::Client).to receive(:new).and_return(es_client_double) + end + + after do + subject.close + end + + context "when ssl_enabled is" do + context "true and there is no https hosts" do + let(:hosts) { %w[http://es01 http://es01] } + + it "should not infer the ssl_enabled value" do + subject.register + expect(subject.instance_variable_get(:@ssl_enabled)).to eql(true) + expect(subject.params).to match hash_including("ssl_enabled" => true) + end + end + + context "false and cloud_id resolved host is https" do + let(:settings) {{ + "ssl_enabled" => false, + "hosts" => [], + "cloud_id" => "sample:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvJGFjMzFlYmI5MDI0MTc3MzE1NzA0M2MzNGZkMjZmZDQ2OjkyNDMkYTRjMDYyMzBlNDhjOGZjZTdiZTg4YTA3NGEzYmIzZTA6OTI0NA==" + }} + + it "should not infer the ssl_enabled value" do + subject.register + expect(subject.instance_variable_get(:@ssl_enabled)).to eql(false) + expect(subject.params).to match hash_including("ssl_enabled" => false) + end + end + end + + context "when neither ssl nor ssl_enabled is set" do + let(:settings) { super().reject { |k| %w[ssl ssl_enabled].include?(k) } } + + context "and there is no https hosts" do + let(:hosts) { %w[http://es01 http://es01] } + + it "should infer the ssl_enabled value to false" do + subject.register + expect(subject.instance_variable_get(:@ssl_enabled)).to eql(false) + expect(subject.params).to match hash_including("ssl_enabled" => false) + end + end + + context "and there is https hosts" do + let(:hosts) { %w[https://sec-es01 https://sec-es01] } + + it "should infer the ssl_enabled value to true" do + subject.register + expect(subject.instance_variable_get(:@ssl_enabled)).to eql(true) + expect(subject.params).to match hash_including("ssl_enabled" => true) + end + end + + context "and hosts have no scheme defined" do + let(:hosts) { %w[es01 es01] } + + it "should infer the ssl_enabled value to false" do + subject.register + expect(subject.instance_variable_get(:@ssl_enabled)).to eql(false) + expect(subject.params).to match hash_including("ssl_enabled" => false) + end + end + + context "and cloud_id resolved host is https" do + let(:settings) {{ + "hosts" => [], + "cloud_id" => "sample:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvJGFjMzFlYmI5MDI0MTc3MzE1NzA0M2MzNGZkMjZmZDQ2OjkyNDMkYTRjMDYyMzBlNDhjOGZjZTdiZTg4YTA3NGEzYmIzZTA6OTI0NA==" + }} + + it "should infer the ssl_enabled value to false" do + subject.register + expect(subject.instance_variable_get(:@ssl_enabled)).to eql(true) + expect(subject.params).to match hash_including("ssl_enabled" => true) + end + end + end + + context "when ssl_verification_mode" do + context "is set to none" do + let(:settings) { super().merge( + "ssl_verification_mode" => "none", + ) } + + it "should print a warning" do + expect(subject.logger).to receive(:warn).with(/You have enabled encryption but DISABLED certificate verification/).at_least(:once) + allow(subject.logger).to receive(:warn).with(any_args) + + subject.register + end + + it "should pass the flag to the ES client" do + expect(::Elasticsearch::Client).to receive(:new) do |args| + expect(args[:ssl]).to match hash_including(:ssl => true, :verify => :disable) + end.and_return(es_client_double) + + subject.register + end + end + + context "is set to full" do + let(:settings) { super().merge( + "ssl_verification_mode" => 'full', + ) } + + it "should pass the flag to the ES client" do + expect(::Elasticsearch::Client).to receive(:new) do |args| + expect(args[:ssl]).to match hash_including(:ssl => true, :verify => :strict) + end.and_return(es_client_double) + + subject.register + end + end + end + + context "with the conflicting configs" do + context "ssl_certificate_authorities and ssl_truststore_path set" do + let(:ssl_truststore_path) { Stud::Temporary.file.path } + let(:ssl_certificate_authorities_path) { Stud::Temporary.file.path } + let(:settings) { super().merge( + "ssl_truststore_path" => ssl_truststore_path, + "ssl_certificate_authorities" => ssl_certificate_authorities_path + ) } + + after :each do + File.delete(ssl_truststore_path) + File.delete(ssl_certificate_authorities_path) + end + + it "should raise a configuration error" do + expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Use either "ssl_certificate_authorities\/ca_file" or "ssl_truststore_path"/) + end + end + + context "ssl_certificate and ssl_keystore_path set" do + let(:ssl_keystore_path) { Stud::Temporary.file.path } + let(:ssl_certificate_path) { Stud::Temporary.file.path } + let(:settings) { super().merge( + "ssl_certificate" => ssl_certificate_path, + "ssl_keystore_path" => ssl_keystore_path + ) } + + after :each do + File.delete(ssl_keystore_path) + File.delete(ssl_certificate_path) + end + + it "should raise a configuration error" do + expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Use either "ssl_certificate" or "ssl_keystore_path\/keystore"/) + end + end + end + + context "when configured with Java store files" do + let(:ssl_truststore_path) { Stud::Temporary.file.path } + let(:ssl_keystore_path) { Stud::Temporary.file.path } + + after :each do + File.delete(ssl_truststore_path) + File.delete(ssl_keystore_path) + end + + let(:settings) { super().merge( + "ssl_truststore_path" => ssl_truststore_path, + "ssl_truststore_type" => "jks", + "ssl_truststore_password" => "foo", + "ssl_keystore_path" => ssl_keystore_path, + "ssl_keystore_type" => "jks", + "ssl_keystore_password" => "bar", + "ssl_verification_mode" => "full", + "ssl_cipher_suites" => ["TLS_DHE_RSA_WITH_AES_256_CBC_SHA256"], + "ssl_supported_protocols" => ["TLSv1.3"] + ) } + + it "should pass the parameters to the ES client" do + expect(::Elasticsearch::Client).to receive(:new) do |args| + expect(args[:ssl]).to match hash_including( + :ssl => true, + :keystore => ssl_keystore_path, + :keystore_type => "jks", + :keystore_password => "bar", + :truststore => ssl_truststore_path, + :truststore_type => "jks", + :truststore_password => "foo", + :verify => :strict, + :cipher_suites => ["TLS_DHE_RSA_WITH_AES_256_CBC_SHA256"], + :protocols => ["TLSv1.3"], + ) + end.and_return(es_client_double) + + subject.register + end + end + + context "when configured with certificate files" do + let(:ssl_certificate_authorities_path) { Stud::Temporary.file.path } + let(:ssl_certificate_path) { Stud::Temporary.file.path } + let(:ssl_key_path) { Stud::Temporary.file.path } + let(:settings) { super().merge( + "ssl_certificate_authorities" => [ssl_certificate_authorities_path], + "ssl_certificate" => ssl_certificate_path, + "ssl_key" => ssl_key_path, + "ssl_verification_mode" => "full", + "ssl_cipher_suites" => ["TLS_DHE_RSA_WITH_AES_256_CBC_SHA256"], + "ssl_supported_protocols" => ["TLSv1.3"] + ) } + + after :each do + File.delete(ssl_certificate_authorities_path) + File.delete(ssl_certificate_path) + File.delete(ssl_key_path) + end + + it "should pass the parameters to the ES client" do + expect(::Elasticsearch::Client).to receive(:new) do |args| + expect(args[:ssl]).to match hash_including( + :ssl => true, + :ca_file => ssl_certificate_authorities_path, + :client_cert => ssl_certificate_path, + :client_key => ssl_key_path, + :verify => :strict, + :cipher_suites => ["TLS_DHE_RSA_WITH_AES_256_CBC_SHA256"], + :protocols => ["TLSv1.3"], + ) + end.and_return(es_client_double) + + subject.register + end + + context "and only the ssl_certificate is set" do + let(:settings) { super().reject { |k| "ssl_key".eql?(k) } } + + it "should raise a configuration error" do + expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Using an "ssl_certificate" requires an "ssl_key"/) + end + end + + context "and only the ssl_key is set" do + let(:settings) { super().reject { |k| "ssl_certificate".eql?(k) } } + + it "should raise a configuration error" do + expect { subject.register }.to raise_error(LogStash::ConfigurationError, /An "ssl_certificate" is required when using an "ssl_key"/) + end + end + end +end + diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb index 0b9aee02..e9c02fa9 100644 --- a/spec/inputs/integration/elasticsearch_spec.rb +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -95,13 +95,13 @@ end context 'with ca_file' do - let(:config) { super().merge('ssl' => true, 'ca_file' => ca_file) } + let(:config) { super().merge('ssl_enabled' => true, 'ssl_certificate_authorities' => ca_file) } it_behaves_like 'secured_elasticsearch' end context 'with `ca_trusted_fingerprint`' do let(:ca_trusted_fingerprint) { File.read("spec/fixtures/test_certs/ca.der.sha256").chomp } - let(:config) { super().merge('ssl' => true, 'ca_trusted_fingerprint' => ca_trusted_fingerprint) } + let(:config) { super().merge('ssl_enabled' => true, 'ca_trusted_fingerprint' => ca_trusted_fingerprint) } if Gem::Version.create(LOGSTASH_VERSION) >= Gem::Version.create("8.3.0") it_behaves_like 'secured_elasticsearch' @@ -125,11 +125,11 @@ context 'setting host:port (and ssl)', secure_integration: true do - let(:client_options) { { :ca_file => ca_file, :user => user, :password => password } } + let(:client_options) { { :ssl_certificate_authorities => ca_file, :user => user, :password => password } } let(:config) do config = super().merge "hosts" => [ESHelper.get_host_port] - config.merge('user' => user, 'password' => password, 'ssl' => true, 'ca_file' => ca_file) + config.merge('user' => user, 'password' => password, 'ssl_enabled' => true, 'ssl_certificate_authorities' => ca_file) end it_behaves_like 'an elasticsearch index plugin' From 3f3f129c871b194f51fe833258f5a232ac73a7ad Mon Sep 17 00:00:00 2001 From: kaisecheng <69120390+kaisecheng@users.noreply.github.com> Date: Thu, 27 Apr 2023 13:28:06 +0100 Subject: [PATCH 154/207] fix scroll slice memory (#189) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit successful slices are pushed to the queue immediately without waiting for the other slices Fixed:#188 Co-authored-by: João Duarte --- CHANGELOG.md | 3 ++ docs/index.asciidoc | 1 + lib/logstash/inputs/elasticsearch.rb | 54 +++++++++------------------- logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 35 ++++++++++-------- 5 files changed, 41 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc7c7638..23ea617b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.17.1 + - Fix: scroll slice high memory consumption [#189](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/189) + ## 4.17.0 - Added SSL settings for: [#185](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/185) - `ssl_enabled`: Enable/disable the SSL settings. If not provided, the value is inferred from the hosts scheme diff --git a/docs/index.asciidoc b/docs/index.asciidoc index c44ab493..f65d6e72 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -353,6 +353,7 @@ documents and/or the <> has been specified as a l The number of times to re-run the query after the first failure. If the query fails after all retries, it logs an error message. The default is 0 (no retry). This value should be equal to or greater than zero. +NOTE: Partial failures - such as errors in a subset of all slices - can result in the entire query being retried, which can lead to duplication of data. Avoiding this would require Logstash to store the entire result set of a query in memory which is often not possible. [id="plugins-{type}s-{plugin}-schedule"] ===== `schedule` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 1f3251ee..9d4ff777 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -256,6 +256,8 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # config :ca_trusted_fingerprint, :validate => :sha_256_hex include LogStash::PluginMixins::CATrustedFingerprintSupport + attr_reader :pipeline_id + def initialize(params={}) super(params) @@ -267,6 +269,8 @@ def initialize(params={}) def register require "rufus/scheduler" + @pipeline_id = execution_context&.pipeline_id || 'main' + fill_hosts_from_cloud_id setup_ssl_params! @@ -326,20 +330,22 @@ def run(output_queue) def do_run(output_queue) # if configured to run a single slice, don't bother spinning up threads if @slices.nil? || @slices <= 1 - success, events = retryable_slice - success && events.each { |event| output_queue << event } - return + return retryable(JOB_NAME) do + do_run_slice(output_queue) + end end logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8 - slice_results = parallel_slice # array of tuple(ok, events) - # insert events to queue if all slices success - if slice_results.all?(&:first) - slice_results.flat_map { |success, events| events } - .each { |event| output_queue << event } - end + @slices.times.map do |slice_id| + Thread.new do + LogStash::Util::set_thread_name("[#{pipeline_id}]|input|elasticsearch|slice_#{slice_id}") + retryable(JOB_NAME) do + do_run_slice(output_queue, slice_id) + end + end + end.map(&:join) logger.trace("#{@slices} slices completed") end @@ -347,42 +353,14 @@ def do_run(output_queue) def retryable(job_name, &block) begin stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name) - output = stud_try.try((@retries + 1).times) { yield } - [true, output] + stud_try.try((@retries + 1).times) { yield } rescue => e error_details = {:message => e.message, :cause => e.cause} error_details[:backtrace] = e.backtrace if logger.debug? logger.error("Tried #{job_name} unsuccessfully", error_details) - [false, nil] end end - - # @return [(ok, events)] : Array of tuple(Boolean, [Logstash::Event]) - def parallel_slice - pipeline_id = execution_context&.pipeline_id || 'main' - @slices.times.map do |slice_id| - Thread.new do - LogStash::Util::set_thread_name("[#{pipeline_id}]|input|elasticsearch|slice_#{slice_id}") - retryable_slice(slice_id) - end - end.map do |t| - t.join - t.value - end - end - - # @param scroll_id [Integer] - # @return (ok, events) [Boolean, Array(Logstash::Event)] - def retryable_slice(slice_id=nil) - retryable(JOB_NAME) do - output = [] - do_run_slice(output, slice_id) - output - end - end - - def do_run_slice(output_queue, slice_id=nil) slice_query = @base_query slice_query = slice_query.merge('slice' => { 'id' => slice_id, 'max' => @slices}) unless slice_id.nil? diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 5a98d2f0..b5de0302 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.17.0' + s.version = '4.17.1' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 1aa31410..1921196c 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -204,7 +204,7 @@ context 'with `slices => 1`' do let(:slices) { 1 } it 'runs just one slice' do - expect(plugin).to receive(:do_run_slice).with(duck_type(:<<), nil) + expect(plugin).to receive(:do_run_slice).with(duck_type(:<<)) expect(Thread).to_not receive(:new) plugin.register @@ -215,7 +215,7 @@ context 'without slices directive' do let(:config) { super().tap { |h| h.delete('slices') } } it 'runs just one slice' do - expect(plugin).to receive(:do_run_slice).with(duck_type(:<<), nil) + expect(plugin).to receive(:do_run_slice).with(duck_type(:<<)) expect(Thread).to_not receive(:new) plugin.register @@ -414,18 +414,19 @@ def synchronize_method!(object, method_name) expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) plugin.register - expect(client).to receive(:clear_scroll).and_return(nil) + expect(client).to receive(:clear_scroll).twice.and_return(nil) - # SLICE0 is a three-page scroll in which the second page throw exception + # SLICE0 is a three-page scroll slice0_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 0, 'max' => 2})) expect(client).to receive(:search).with(hash_including(:body => slice0_query)).and_return(slice0_response0) - expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll1 })).and_raise("boom") + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll1 })).and_return(slice0_response1) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice0_scroll2 })).and_return(slice0_response2) allow(client).to receive(:ping) - # SLICE1 is a two-page scroll in which the last page has no next scroll id + # SLICE1 is a two-page scroll in which the last page throws exception slice1_query = LogStash::Json.dump(query.merge('slice' => { 'id' => 1, 'max' => 2})) expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0) - expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_return(slice1_response1) + expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_raise("boom") synchronize_method!(plugin, :scroll_request) synchronize_method!(plugin, :search_request) @@ -433,18 +434,22 @@ def synchronize_method!(object, method_name) let(:client) { Elasticsearch::Client.new } - it 'does not insert event to queue' do - expect(plugin).to receive(:parallel_slice).and_wrap_original do |m, *args| - slice0, slice1 = m.call - expect(slice0[0]).to be_falsey - expect(slice1[0]).to be_truthy - expect(slice1[1].size).to eq(4) # four items from SLICE1 - [slice0, slice1] + it 'insert event to queue without waiting other slices' do + expect(plugin).to receive(:do_run_slice).twice.and_wrap_original do |m, *args| + q = args[0] + slice_id = args[1] + if slice_id == 0 + m.call(*args) + expect(q.size).to eq(3) + else + sleep(1) + m.call(*args) + end end queue = Queue.new plugin.run(queue) - expect(queue.size).to eq(0) + expect(queue.size).to eq(5) end end end From 903bf7926077e653da43f6f2374438fa4d3fd442 Mon Sep 17 00:00:00 2001 From: Andrea Selva Date: Wed, 31 May 2023 17:24:35 +0200 Subject: [PATCH 155/207] Tag propertly integration tests (#191) Tagged integration test properly to avoid failure if ES is not launched locally --- spec/inputs/integration/elasticsearch_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb index e9c02fa9..ca11e42e 100644 --- a/spec/inputs/integration/elasticsearch_spec.rb +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -4,7 +4,7 @@ require "logstash/inputs/elasticsearch" require_relative "../../../spec/es_helper" -describe LogStash::Inputs::Elasticsearch do +describe LogStash::Inputs::Elasticsearch, :integration => true do SECURE_INTEGRATION = ENV['SECURE_INTEGRATION'].eql? 'true' From f96dad3c80eb27dfd9a76fc5bb555946a7375f2a Mon Sep 17 00:00:00 2001 From: Edmo Vamerlatti Costa <11836452+edmocosta@users.noreply.github.com> Date: Fri, 2 Jun 2023 18:01:09 +0200 Subject: [PATCH 156/207] fix: restore use of DefaultHostnameVerifier (#193) * Document why we are using :default instead of :strict Co-authored-by: Ry Biesemeyer --- CHANGELOG.md | 3 +++ lib/logstash/inputs/elasticsearch.rb | 4 +++- logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_ssl_spec.rb | 6 +++--- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23ea617b..897f63c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.17.2 + - Fixes a regression introduced in 4.17.0 which could prevent a connection from being established to Elasticsearch in some SSL configurations [#193](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/193) + ## 4.17.1 - Fix: scroll slice high memory consumption [#189](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/189) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 9d4ff777..8c933ea6 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -521,7 +521,9 @@ def setup_client_ssl "to make sure your data is secure set `ssl_verification_mode => full`" ssl_options[:verify] = :disable else - ssl_options[:verify] = :strict + # Manticore's :default maps to Apache HTTP Client's DefaultHostnameVerifier, + # which is the modern STRICT verifier that replaces the deprecated StrictHostnameVerifier + ssl_options[:verify] = :default end end diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index b5de0302..86d29f74 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.17.1' + s.version = '4.17.2' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_ssl_spec.rb b/spec/inputs/elasticsearch_ssl_spec.rb index 2189fd82..4ef3ac28 100644 --- a/spec/inputs/elasticsearch_ssl_spec.rb +++ b/spec/inputs/elasticsearch_ssl_spec.rb @@ -123,7 +123,7 @@ it "should pass the flag to the ES client" do expect(::Elasticsearch::Client).to receive(:new) do |args| - expect(args[:ssl]).to match hash_including(:ssl => true, :verify => :strict) + expect(args[:ssl]).to match hash_including(:ssl => true, :verify => :default) end.and_return(es_client_double) subject.register @@ -200,7 +200,7 @@ :truststore => ssl_truststore_path, :truststore_type => "jks", :truststore_password => "foo", - :verify => :strict, + :verify => :default, :cipher_suites => ["TLS_DHE_RSA_WITH_AES_256_CBC_SHA256"], :protocols => ["TLSv1.3"], ) @@ -236,7 +236,7 @@ :ca_file => ssl_certificate_authorities_path, :client_cert => ssl_certificate_path, :client_key => ssl_key_path, - :verify => :strict, + :verify => :default, :cipher_suites => ["TLS_DHE_RSA_WITH_AES_256_CBC_SHA256"], :protocols => ["TLSv1.3"], ) From f7113486661f605f6e7421ccebe40f088387b1b0 Mon Sep 17 00:00:00 2001 From: kaisecheng <69120390+kaisecheng@users.noreply.github.com> Date: Fri, 29 Sep 2023 15:11:51 +0100 Subject: [PATCH 157/207] Add Elastic Api Version header (#195) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit checks if Elasticsearch is Serverless and then recreates the rest client with the default header {"Elastic-Api-Version": "2023-10-31"} Co-authored-by: João Duarte --- CHANGELOG.md | 3 ++ lib/logstash/inputs/elasticsearch.rb | 36 ++++++++++++++++++-- logstash-input-elasticsearch.gemspec | 4 +-- spec/inputs/elasticsearch_spec.rb | 48 +++++++++++++++++++++++++-- spec/inputs/elasticsearch_ssl_spec.rb | 2 ++ 5 files changed, 86 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 897f63c4..96429222 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.18.0 + - Added request header `Elastic-Api-Version` for serverless [#195](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/195) + ## 4.17.2 - Fixes a regression introduced in 4.17.0 which could prevent a connection from being established to Elasticsearch in some SSL configurations [#193](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/193) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 8c933ea6..05014485 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -258,6 +258,9 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base attr_reader :pipeline_id + BUILD_FLAVOR_SERVERLESS = 'serverless'.freeze + DEFAULT_EAV_HEADER = { "Elastic-Api-Version" => "2023-10-31" }.freeze + def initialize(params={}) super(params) @@ -305,13 +308,19 @@ def register transport_options[:proxy] = @proxy.to_s if @proxy && !@proxy.eql?('') - @client = Elasticsearch::Client.new( + @client_options = { :hosts => hosts, :transport_options => transport_options, :transport_class => ::Elasticsearch::Transport::Transport::HTTP::Manticore, :ssl => ssl_options - ) + } + + @client = Elasticsearch::Client.new(@client_options) + test_connection! + + setup_serverless + @client end @@ -435,7 +444,7 @@ def scroll_request(scroll_id) @client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll) end - def search_request(options) + def search_request(options={}) @client.search(options) end @@ -668,6 +677,27 @@ def test_connection! raise LogStash::ConfigurationError, "Could not connect to a compatible version of Elasticsearch" end + # recreate client with default header when it is serverless + # verify the header by sending GET / + def setup_serverless + if serverless? + @client_options[:transport_options][:headers].merge!(DEFAULT_EAV_HEADER) + @client = Elasticsearch::Client.new(@client_options) + @client.info + end + rescue => e + @logger.error("Failed to retrieve Elasticsearch info", message: e.message, exception: e.class, backtrace: e.backtrace) + raise LogStash::ConfigurationError, "Could not connect to a compatible version of Elasticsearch" + end + + def build_flavor + @build_flavor ||= @client.info&.dig('version', 'build_flavor') + end + + def serverless? + @is_serverless ||= (build_flavor == BUILD_FLAVOR_SERVERLESS) + end + module URIOrEmptyValidator ## # @override to provide :uri_or_empty validator diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 86d29f74..8d4ee0c6 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.17.2' + s.version = '4.18.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -26,7 +26,7 @@ Gem::Specification.new do |s| s.add_runtime_dependency "logstash-mixin-validator_support", '~> 1.0' s.add_runtime_dependency "logstash-mixin-scheduler", '~> 1.0' - s.add_runtime_dependency 'elasticsearch', '>= 7.17.1' + s.add_runtime_dependency 'elasticsearch', '>= 7.17.9' s.add_runtime_dependency 'logstash-mixin-ca_trusted_fingerprint_support', '~> 1.0' s.add_runtime_dependency 'logstash-mixin-normalize_config_support', '~>1.0' diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 1921196c..d0b25f71 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -17,9 +17,12 @@ let(:plugin) { described_class.new(config) } let(:queue) { Queue.new } + let(:build_flavor) { "default" } + let(:cluster_info) { {"version" => {"number" => "7.5.0", "build_flavor" => build_flavor}, "tagline" => "You Know, for Search"} } before(:each) do Elasticsearch::Client.send(:define_method, :ping) { } # define no-action ping method + allow_any_instance_of(Elasticsearch::Client).to receive(:info).and_return(cluster_info) end let(:base_config) do @@ -39,7 +42,13 @@ context "against authentic Elasticsearch" do it "should not raise an exception" do expect { plugin.register }.to_not raise_error - end + end + + it "does not set header Elastic-Api-Version" do + plugin.register + client = plugin.send(:client) + expect( extract_transport(client).options[:transport_options][:headers] ).not_to match hash_including("Elastic-Api-Version" => "2023-10-31") + end end context "against not authentic Elasticsearch" do @@ -52,6 +61,37 @@ end end + context "against serverless Elasticsearch" do + before do + allow(plugin).to receive(:test_connection!) + allow(plugin).to receive(:serverless?).and_return(true) + end + + context "with unsupported header" do + let(:es_client) { double("es_client") } + + before do + allow(Elasticsearch::Client).to receive(:new).and_return(es_client) + allow(es_client).to receive(:info).and_raise( + Elasticsearch::Transport::Transport::Errors::BadRequest.new + ) + end + + it "raises an exception" do + expect {plugin.register}.to raise_error(LogStash::ConfigurationError) + end + end + + context "with supported header" do + it "set default header to rest client" do + expect_any_instance_of(Elasticsearch::Client).to receive(:info).and_return(true) + plugin.register + client = plugin.send(:client) + expect( extract_transport(client).options[:transport_options][:headers] ).to match hash_including("Elastic-Api-Version" => "2023-10-31") + end + end + end + context "retry" do let(:config) do { @@ -85,6 +125,7 @@ allow(@esclient).to receive(:scroll) { { "hits" => { "hits" => [hit] } } } allow(@esclient).to receive(:clear_scroll).and_return(nil) allow(@esclient).to receive(:ping) + allow(@esclient).to receive(:info).and_return(cluster_info) end end @@ -869,7 +910,9 @@ def wait_receive_request let(:plugin) { described_class.new(config) } let(:event) { LogStash::Event.new({}) } - it "client should sent the expect user-agent" do + # elasticsearch-ruby 7.17.9 initialize two user agent headers, `user-agent` and `User-Agent` + # hence, fail this header size test case + xit "client should sent the expect user-agent" do plugin.register queue = [] @@ -916,6 +959,7 @@ def wait_receive_request expect(transport_options[manticore_transport_option]).to eq(config_value.to_i) mock_client = double("fake_client") allow(mock_client).to receive(:ping) + allow(mock_client).to receive(:info).and_return(cluster_info) mock_client end diff --git a/spec/inputs/elasticsearch_ssl_spec.rb b/spec/inputs/elasticsearch_ssl_spec.rb index 4ef3ac28..f0a3a671 100644 --- a/spec/inputs/elasticsearch_ssl_spec.rb +++ b/spec/inputs/elasticsearch_ssl_spec.rb @@ -14,6 +14,8 @@ before do allow(es_client_double).to receive(:close) allow(es_client_double).to receive(:ping).with(any_args).and_return(double("pong").as_null_object) + allow(es_client_double).to receive(:info).and_return({"version" => {"number" => "7.5.0", "build_flavor" => "default"}, + "tagline" => "You Know, for Search"}) allow(Elasticsearch::Client).to receive(:new).and_return(es_client_double) end From 438152b35f51f34873cd50e7517cfbb3a0879545 Mon Sep 17 00:00:00 2001 From: kaisecheng <69120390+kaisecheng@users.noreply.github.com> Date: Mon, 13 Nov 2023 16:51:25 +0000 Subject: [PATCH 158/207] Add support to search after (#198) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit introduces the `search_api` option to enable support for both search_after and scroll. `auto`: Default value. It uses `search_after` for Elasticsearch 8 and newer versions, otherwise it falls back to the `scroll` API `search_after`: Create a point in time for search, which is the recommended way to do paginated search since 7.10 `scroll`: Uses scroll API to search, which is the previous way to search When search_after is utilized, if the query doesn't specify a sort field, a default sort of { "sort": { "_shard_doc": "asc" } } will be added to the query. By default, Elasticsearch adds a sort field "_shard_doc" on top of existing sort implicitly as a tie-breakers. The scroll search has been refactored and is expected to function the same, with only minor changes made to the logging messages. Co-authored-by: João Duarte --- CHANGELOG.md | 4 + docs/index.asciidoc | 17 ++ lib/logstash/inputs/elasticsearch.rb | 138 ++++------- .../inputs/elasticsearch/paginated_search.rb | 231 ++++++++++++++++++ logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 194 +++++++++------ spec/inputs/integration/elasticsearch_spec.rb | 18 +- spec/inputs/paginated_search_spec.rb | 129 ++++++++++ 8 files changed, 562 insertions(+), 171 deletions(-) create mode 100644 lib/logstash/inputs/elasticsearch/paginated_search.rb create mode 100644 spec/inputs/paginated_search_spec.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 96429222..b79d08e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 4.19.0 + - Added `search_api` option to support `search_after` and `scroll` [#198](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/198) + - The default value `auto` uses `search_after` for Elasticsearch >= 8, otherwise, fall back to `scroll` + ## 4.18.0 - Added request header `Elastic-Api-Version` for serverless [#195](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/195) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index f65d6e72..7dfd5acb 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -118,6 +118,7 @@ This plugin supports the following configuration options plus the <> | <>|No | <> |<>|No | <> |<>|No +| <> |<>, one of `["auto", "search_after", "scroll"]`|No | <> |<>|No | <> |<>|No | <> |<>|No @@ -333,6 +334,9 @@ environment variables e.g. `proxy => '${LS_PROXY:}'`. The query to be executed. Read the {ref}/query-dsl.html[Elasticsearch query DSL documentation] for more information. +When <> resolves to `search_after` and the query does not specify `sort`, +the default sort `'{ "sort": { "_shard_doc": "asc" } }'` will be added to the query. Please refer to the {ref}/paginate-search-results.html#search-after[Elasticsearch search_after] parameter to know more. + [id="plugins-{type}s-{plugin}-request_timeout_seconds"] ===== `request_timeout_seconds` @@ -377,6 +381,19 @@ This parameter controls the keepalive time in seconds of the scrolling request and initiates the scrolling process. The timeout applies per round trip (i.e. between the previous scroll request, to the next). +[id="plugins-{type}s-{plugin}-seearch_api"] +===== `search_api` + +* Value can be any of: `auto`, `search_after`, `scroll` +* Default value is `auto` + +With `auto` the plugin uses the `search_after` parameter for Elasticsearch version `8.0.0` or higher, otherwise the `scroll` API is used instead. + +`search_after` uses {ref}/point-in-time-api.html#point-in-time-api[point in time] and sort value to search. +The query requires at least one `sort` field, as described in the <> parameter. + +`scroll` uses {ref}/paginate-search-results.html#scroll-search-results[scroll] API to search, which is no longer recommended. + [id="plugins-{type}s-{plugin}-size"] ===== `size` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 05014485..7f3109c2 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -11,7 +11,6 @@ require "logstash/plugin_mixins/scheduler" require "logstash/plugin_mixins/normalize_config_support" require "base64" -require 'logstash/helpers/loggable_try' require "elasticsearch" require "elasticsearch/transport/transport/http/manticore" @@ -74,6 +73,8 @@ # class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base + require 'logstash/inputs/elasticsearch/paginated_search' + include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1) include LogStash::PluginMixins::ECSCompatibilitySupport::TargetCheck @@ -106,6 +107,10 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # The number of retries to run the query. If the query fails after all retries, it logs an error message. config :retries, :validate => :number, :default => 0 + # Default `auto` will use `search_after` api for Elasticsearch 8 and use `scroll` api for 7 + # Set to scroll to fallback to previous version + config :search_api, :validate => %w[auto search_after scroll], :default => "auto" + # This parameter controls the keepalive time in seconds of the scrolling # request and initiates the scrolling process. The timeout applies per # round trip (i.e. between the previous scroll request, to the next). @@ -321,93 +326,21 @@ def register setup_serverless + setup_search_api + @client end def run(output_queue) if @schedule - scheduler.cron(@schedule) { do_run(output_queue) } + scheduler.cron(@schedule) { @paginated_search.do_run(output_queue) } scheduler.join else - do_run(output_queue) - end - end - - private - JOB_NAME = "run query" - def do_run(output_queue) - # if configured to run a single slice, don't bother spinning up threads - if @slices.nil? || @slices <= 1 - return retryable(JOB_NAME) do - do_run_slice(output_queue) - end - end - - logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8 - - - @slices.times.map do |slice_id| - Thread.new do - LogStash::Util::set_thread_name("[#{pipeline_id}]|input|elasticsearch|slice_#{slice_id}") - retryable(JOB_NAME) do - do_run_slice(output_queue, slice_id) - end - end - end.map(&:join) - - logger.trace("#{@slices} slices completed") - end - - def retryable(job_name, &block) - begin - stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name) - stud_try.try((@retries + 1).times) { yield } - rescue => e - error_details = {:message => e.message, :cause => e.cause} - error_details[:backtrace] = e.backtrace if logger.debug? - logger.error("Tried #{job_name} unsuccessfully", error_details) + @paginated_search.do_run(output_queue) end end - def do_run_slice(output_queue, slice_id=nil) - slice_query = @base_query - slice_query = slice_query.merge('slice' => { 'id' => slice_id, 'max' => @slices}) unless slice_id.nil? - - slice_options = @options.merge(:body => LogStash::Json.dump(slice_query) ) - - logger.info("Slice starting", slice_id: slice_id, slices: @slices) unless slice_id.nil? - - begin - r = search_request(slice_options) - - r['hits']['hits'].each { |hit| push_hit(hit, output_queue) } - logger.debug("Slice progress", slice_id: slice_id, slices: @slices) unless slice_id.nil? - - has_hits = r['hits']['hits'].any? - scroll_id = r['_scroll_id'] - - while has_hits && scroll_id && !stop? - has_hits, scroll_id = process_next_scroll(output_queue, scroll_id) - logger.debug("Slice progress", slice_id: slice_id, slices: @slices) if logger.debug? && slice_id - end - logger.info("Slice complete", slice_id: slice_id, slices: @slices) unless slice_id.nil? - ensure - clear_scroll(scroll_id) - end - end - - ## - # @param output_queue [#<<] - # @param scroll_id [String]: a scroll id to resume - # @return [Array(Boolean,String)]: a tuple representing whether the response - # - def process_next_scroll(output_queue, scroll_id) - r = scroll_request(scroll_id) - r['hits']['hits'].each { |hit| push_hit(hit, output_queue) } - [r['hits']['hits'].any?, r['_scroll_id']] - end - def push_hit(hit, output_queue) event = targeted_event_factory.new_event hit['_source'] set_docinfo_fields(hit, event) if @docinfo @@ -433,20 +366,7 @@ def set_docinfo_fields(hit, event) event.set(@docinfo_target, docinfo_target) end - def clear_scroll(scroll_id) - @client.clear_scroll(:body => { :scroll_id => scroll_id }) if scroll_id - rescue => e - # ignore & log any clear_scroll errors - logger.warn("Ignoring clear_scroll exception", message: e.message, exception: e.class) - end - - def scroll_request(scroll_id) - @client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll) - end - - def search_request(options={}) - @client.search(options) - end + private def hosts_default?(hosts) hosts.nil? || ( hosts.is_a?(Array) && hosts.empty? ) @@ -677,6 +597,18 @@ def test_connection! raise LogStash::ConfigurationError, "Could not connect to a compatible version of Elasticsearch" end + def es_info + @es_info ||= @client.info + end + + def es_version + @es_version ||= es_info&.dig('version', 'number') + end + + def es_major_version + @es_major_version ||= es_version.split('.').first.to_i + end + # recreate client with default header when it is serverless # verify the header by sending GET / def setup_serverless @@ -691,13 +623,35 @@ def setup_serverless end def build_flavor - @build_flavor ||= @client.info&.dig('version', 'build_flavor') + @build_flavor ||= es_info&.dig('version', 'build_flavor') end def serverless? @is_serverless ||= (build_flavor == BUILD_FLAVOR_SERVERLESS) end + def setup_search_api + @resolved_search_api = if @search_api == "auto" + api = if es_major_version >= 8 + "search_after" + else + "scroll" + end + logger.info("`search_api => auto` resolved to `#{api}`", :elasticsearch => es_version) + api + else + @search_api + end + + + @paginated_search = if @resolved_search_api == "search_after" + LogStash::Inputs::Elasticsearch::SearchAfter.new(@client, self) + else + logger.warn("scroll API is no longer recommended for pagination. Consider using search_after instead.") if es_major_version >= 8 + LogStash::Inputs::Elasticsearch::Scroll.new(@client, self) + end + end + module URIOrEmptyValidator ## # @override to provide :uri_or_empty validator diff --git a/lib/logstash/inputs/elasticsearch/paginated_search.rb b/lib/logstash/inputs/elasticsearch/paginated_search.rb new file mode 100644 index 00000000..2e8236bc --- /dev/null +++ b/lib/logstash/inputs/elasticsearch/paginated_search.rb @@ -0,0 +1,231 @@ +require 'logstash/helpers/loggable_try' + +module LogStash + module Inputs + class Elasticsearch + class PaginatedSearch + include LogStash::Util::Loggable + + def initialize(client, plugin) + @client = client + @plugin_params = plugin.params + + @index = @plugin_params["index"] + @query = LogStash::Json.load(@plugin_params["query"]) + @scroll = @plugin_params["scroll"] + @size = @plugin_params["size"] + @slices = @plugin_params["slices"] + @retries = @plugin_params["retries"] + + @plugin = plugin + @pipeline_id = plugin.pipeline_id + end + + def do_run(output_queue) + return retryable_search(output_queue) if @slices.nil? || @slices <= 1 + + retryable_slice_search(output_queue) + end + + def retryable(job_name, &block) + stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name) + stud_try.try((@retries + 1).times) { yield } + rescue => e + error_details = {:message => e.message, :cause => e.cause} + error_details[:backtrace] = e.backtrace if logger.debug? + logger.error("Tried #{job_name} unsuccessfully", error_details) + end + + def retryable_search(output_queue) + raise NotImplementedError + end + + def retryable_slice_search(output_queue) + raise NotImplementedError + end + end + + class Scroll < PaginatedSearch + SCROLL_JOB = "scroll paginated search" + + def search_options(slice_id) + query = @query + query = @query.merge('slice' => { 'id' => slice_id, 'max' => @slices}) unless slice_id.nil? + { + :index => @index, + :scroll => @scroll, + :size => @size, + :body => LogStash::Json.dump(query) + } + end + + def initial_search(slice_id) + options = search_options(slice_id) + @client.search(options) + end + + def next_page(scroll_id) + @client.scroll(:body => { :scroll_id => scroll_id }, :scroll => @scroll) + end + + def process_page(output_queue) + r = yield + r['hits']['hits'].each { |hit| @plugin.push_hit(hit, output_queue) } + [r['hits']['hits'].any?, r['_scroll_id']] + end + + def search(output_queue, slice_id=nil) + log_details = {} + log_details = log_details.merge({ slice_id: slice_id, slices: @slices }) unless slice_id.nil? + + logger.info("Query start", log_details) + has_hits, scroll_id = process_page(output_queue) { initial_search(slice_id) } + + while has_hits && scroll_id && !@plugin.stop? + logger.debug("Query progress", log_details) + has_hits, scroll_id = process_page(output_queue) { next_page(scroll_id) } + end + + logger.info("Query completed", log_details) + ensure + clear(scroll_id) + end + + def retryable_search(output_queue, slice_id=nil) + retryable(SCROLL_JOB) do + search(output_queue, slice_id) + end + end + + def retryable_slice_search(output_queue) + logger.warn("managed slices for query is very large (#{@slices}); consider reducing") if @slices > 8 + + @slices.times.map do |slice_id| + Thread.new do + LogStash::Util::set_thread_name("[#{@pipeline_id}]|input|elasticsearch|slice_#{slice_id}") + retryable_search(output_queue, slice_id) + end + end.map(&:join) + + logger.trace("#{@slices} slices completed") + end + + def clear(scroll_id) + @client.clear_scroll(:body => { :scroll_id => scroll_id }) if scroll_id + rescue => e + # ignore & log any clear_scroll errors + logger.debug("Ignoring clear_scroll exception", message: e.message, exception: e.class) + end + end + + class SearchAfter < PaginatedSearch + PIT_JOB = "create point in time (PIT)" + SEARCH_AFTER_JOB = "search_after paginated search" + + def pit?(id) + !!id&.is_a?(String) + end + + def create_pit + logger.info("Create point in time (PIT)") + r = @client.open_point_in_time(index: @index, keep_alive: @scroll) + r['id'] + end + + def search_options(pit_id: , search_after: nil, slice_id: nil) + body = @query.merge({ + :pit => { + :id => pit_id, + :keep_alive => @scroll + } + }) + + # search_after requires at least a sort field explicitly + # we add default sort "_shard_doc": "asc" if the query doesn't have any sort field + # by default, ES adds the same implicitly on top of the provided "sort" + # https://www.elastic.co/guide/en/elasticsearch/reference/8.10/paginate-search-results.html#CO201-2 + body = body.merge(:sort => {"_shard_doc": "asc"}) if @query&.dig("sort").nil? + + body = body.merge(:search_after => search_after) unless search_after.nil? + body = body.merge(:slice => {:id => slice_id, :max => @slices}) unless slice_id.nil? + { + :size => @size, + :body => body + } + end + + def next_page(pit_id: , search_after: nil, slice_id: nil) + options = search_options(pit_id: pit_id, search_after: search_after, slice_id: slice_id) + logger.trace("search options", options) + @client.search(options) + end + + def process_page(output_queue) + r = yield + r['hits']['hits'].each { |hit| @plugin.push_hit(hit, output_queue) } + + has_hits = r['hits']['hits'].any? + search_after = r['hits']['hits'][-1]['sort'] rescue nil + logger.warn("Query got data but the sort value is empty") if has_hits && search_after.nil? + [ has_hits, search_after ] + end + + def with_pit + pit_id = retryable(PIT_JOB) { create_pit } + yield pit_id if pit?(pit_id) + ensure + clear(pit_id) + end + + def search(output_queue:, slice_id: nil, pit_id:) + log_details = {} + log_details = log_details.merge({ slice_id: slice_id, slices: @slices }) unless slice_id.nil? + logger.info("Query start", log_details) + + has_hits = true + search_after = nil + + while has_hits && !@plugin.stop? + logger.debug("Query progress", log_details) + has_hits, search_after = process_page(output_queue) do + next_page(pit_id: pit_id, search_after: search_after, slice_id: slice_id) + end + end + + logger.info("Query completed", log_details) + end + + def retryable_search(output_queue) + with_pit do |pit_id| + retryable(SEARCH_AFTER_JOB) do + search(output_queue: output_queue, pit_id: pit_id) + end + end + end + + def retryable_slice_search(output_queue) + with_pit do |pit_id| + @slices.times.map do |slice_id| + Thread.new do + LogStash::Util::set_thread_name("[#{@pipeline_id}]|input|elasticsearch|slice_#{slice_id}") + retryable(SEARCH_AFTER_JOB) do + search(output_queue: output_queue, slice_id: slice_id, pit_id: pit_id) + end + end + end.map(&:join) + end + + logger.trace("#{@slices} slices completed") + end + + def clear(pit_id) + logger.info("Closing point in time (PIT)") + @client.close_point_in_time(:body => {:id => pit_id} ) if pit?(pit_id) + rescue => e + logger.debug("Ignoring close_point_in_time exception", message: e.message, exception: e.class) + end + end + + end + end +end diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 8d4ee0c6..792b7d9d 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.18.0' + s.version = '4.19.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index d0b25f71..cda84888 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -18,7 +18,8 @@ let(:plugin) { described_class.new(config) } let(:queue) { Queue.new } let(:build_flavor) { "default" } - let(:cluster_info) { {"version" => {"number" => "7.5.0", "build_flavor" => build_flavor}, "tagline" => "You Know, for Search"} } + let(:es_version) { "7.5.0" } + let(:cluster_info) { {"version" => {"number" => es_version, "build_flavor" => build_flavor}, "tagline" => "You Know, for Search"} } before(:each) do Elasticsearch::Client.send(:define_method, :ping) { } # define no-action ping method @@ -102,6 +103,26 @@ expect { plugin.register }.to raise_error(LogStash::ConfigurationError) end end + + context "search_api" do + before(:each) do + plugin.register + end + + context "ES 8" do + let(:es_version) { "8.10.0" } + it "resolves `auto` to `search_after`" do + expect(plugin.instance_variable_get(:@paginated_search)).to be_a LogStash::Inputs::Elasticsearch::SearchAfter + end + end + + context "ES 7" do + let(:es_version) { "7.17.0" } + it "resolves `auto` to `scroll`" do + expect(plugin.instance_variable_get(:@paginated_search)).to be_a LogStash::Inputs::Elasticsearch::Scroll + end + end + end end it_behaves_like "an interruptible input plugin" do @@ -244,22 +265,24 @@ context 'with `slices => 1`' do let(:slices) { 1 } + before { plugin.register } + it 'runs just one slice' do - expect(plugin).to receive(:do_run_slice).with(duck_type(:<<)) + expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).with(duck_type(:<<), nil) expect(Thread).to_not receive(:new) - plugin.register plugin.run([]) end end context 'without slices directive' do let(:config) { super().tap { |h| h.delete('slices') } } + before { plugin.register } + it 'runs just one slice' do - expect(plugin).to receive(:do_run_slice).with(duck_type(:<<)) + expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).with(duck_type(:<<), nil) expect(Thread).to_not receive(:new) - plugin.register plugin.run([]) end end @@ -267,13 +290,14 @@ 2.upto(8) do |slice_count| context "with `slices => #{slice_count}`" do let(:slices) { slice_count } + before { plugin.register } + it "runs #{slice_count} independent slices" do expect(Thread).to receive(:new).and_call_original.exactly(slice_count).times slice_count.times do |slice_id| - expect(plugin).to receive(:do_run_slice).with(duck_type(:<<), slice_id) + expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).with(duck_type(:<<), slice_id) end - plugin.register plugin.run([]) end end @@ -399,8 +423,8 @@ def synchronize_method!(object, method_name) expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0) expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_return(slice1_response1) - synchronize_method!(plugin, :scroll_request) - synchronize_method!(plugin, :search_request) + synchronize_method!(plugin.instance_variable_get(:@paginated_search), :next_page) + synchronize_method!(plugin.instance_variable_get(:@paginated_search), :initial_search) end let(:client) { Elasticsearch::Client.new } @@ -469,14 +493,14 @@ def synchronize_method!(object, method_name) expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0) expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_raise("boom") - synchronize_method!(plugin, :scroll_request) - synchronize_method!(plugin, :search_request) + synchronize_method!(plugin.instance_variable_get(:@paginated_search), :next_page) + synchronize_method!(plugin.instance_variable_get(:@paginated_search), :initial_search) end let(:client) { Elasticsearch::Client.new } it 'insert event to queue without waiting other slices' do - expect(plugin).to receive(:do_run_slice).twice.and_wrap_original do |m, *args| + expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).twice.and_wrap_original do |m, *args| q = args[0] slice_id = args[1] if slice_id == 0 @@ -996,7 +1020,7 @@ def wait_receive_request it "should properly schedule" do begin - expect(plugin).to receive(:do_run) { + expect(plugin.instance_variable_get(:@paginated_search)).to receive(:do_run) { queue << LogStash::Event.new({}) }.at_least(:twice) runner = Thread.start { plugin.run(queue) } @@ -1013,46 +1037,7 @@ def wait_receive_request end context "retries" do - let(:mock_response) do - { - "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", - "took" => 27, - "timed_out" => false, - "_shards" => { - "total" => 169, - "successful" => 169, - "failed" => 0 - }, - "hits" => { - "total" => 1, - "max_score" => 1.0, - "hits" => [ { - "_index" => "logstash-2014.10.12", - "_type" => "logs", - "_id" => "C5b2xLQwTZa76jBmHIbwHQ", - "_score" => 1.0, - "_source" => { "message" => ["ohayo"] } - } ] - } - } - end - - let(:mock_scroll_response) do - { - "_scroll_id" => "r453Wc1jh0caLJhSDg", - "hits" => { "hits" => [] } - } - end - - before(:each) do - client = Elasticsearch::Client.new - allow(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) - allow(client).to receive(:search).with(any_args).and_return(mock_response) - allow(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(mock_scroll_response) - allow(client).to receive(:clear_scroll).and_return(nil) - allow(client).to receive(:ping) - end - + let(:client) { Elasticsearch::Client.new } let(:config) do { "hosts" => ["localhost"], @@ -1061,29 +1046,98 @@ def wait_receive_request } end - it "retry and log error when all search request fail" do - expect(plugin.logger).to receive(:error).with(/Tried .* unsuccessfully/, - hash_including(:message => 'Manticore::UnknownException')) - expect(plugin.logger).to receive(:warn).twice.with(/Attempt to .* but failed/, - hash_including(:exception => "Manticore::UnknownException")) - expect(plugin).to receive(:search_request).with(instance_of(Hash)).and_raise(Manticore::UnknownException).at_least(:twice) + shared_examples "a retryable plugin" do + it "retry and log error when all search request fail" do + expect_any_instance_of(LogStash::Helpers::LoggableTry).to receive(:log_failure).with(instance_of(Manticore::UnknownException), instance_of(Integer), instance_of(String)).twice + expect(client).to receive(:search).with(instance_of(Hash)).and_raise(Manticore::UnknownException).at_least(:twice) - plugin.register + plugin.register + + expect{ plugin.run(queue) }.not_to raise_error + end + + it "retry successfully when search request fail for one time" do + expect_any_instance_of(LogStash::Helpers::LoggableTry).to receive(:log_failure).with(instance_of(Manticore::UnknownException), 1, instance_of(String)) + expect(client).to receive(:search).with(instance_of(Hash)).once.and_raise(Manticore::UnknownException) + expect(client).to receive(:search).with(instance_of(Hash)).once.and_return(search_response) - expect{ plugin.run(queue) }.not_to raise_error - expect(queue.size).to eq(0) + plugin.register + + expect{ plugin.run(queue) }.not_to raise_error + end end - it "retry successfully when search request fail for one time" do - expect(plugin.logger).to receive(:warn).once.with(/Attempt to .* but failed/, - hash_including(:exception => "Manticore::UnknownException")) - expect(plugin).to receive(:search_request).with(instance_of(Hash)).once.and_raise(Manticore::UnknownException) - expect(plugin).to receive(:search_request).with(instance_of(Hash)).once.and_call_original + describe "scroll" do + let(:search_response) do + { + "_scroll_id" => "cXVlcnlUaGVuRmV0Y2g", + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "failed" => 0 + }, + "hits" => { + "total" => 1, + "max_score" => 1.0, + "hits" => [ { + "_index" => "logstash-2014.10.12", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "_source" => { "message" => ["ohayo"] } + } ] + } + } + end - plugin.register + let(:empty_scroll_response) do + { + "_scroll_id" => "r453Wc1jh0caLJhSDg", + "hits" => { "hits" => [] } + } + end + + before(:each) do + allow(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + allow(client).to receive(:scroll).with({ :body => { :scroll_id => "cXVlcnlUaGVuRmV0Y2g" }, :scroll=> "1m" }).and_return(empty_scroll_response) + allow(client).to receive(:clear_scroll).and_return(nil) + allow(client).to receive(:ping) + end + + it_behaves_like "a retryable plugin" + end + + describe "search_after" do + let(:es_version) { "8.10.0" } + let(:config) { super().merge({ "search_api" => "search_after" }) } + + let(:search_response) do + { + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "failed" => 0 + }, + "hits" => { + "total" => 1, + "max_score" => 1.0, + "hits" => [ ] # empty hits to break the loop + } + } + end + + before(:each) do + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + expect(client).to receive(:open_point_in_time).once.and_return({ "id" => "cXVlcnlUaGVuRmV0Y2g"}) + expect(client).to receive(:close_point_in_time).once.and_return(nil) + expect(client).to receive(:ping) + end - expect{ plugin.run(queue) }.not_to raise_error - expect(queue.size).to eq(1) + it_behaves_like "a retryable plugin" end end diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb index ca11e42e..63301900 100644 --- a/spec/inputs/integration/elasticsearch_spec.rb +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -20,10 +20,7 @@ let(:password) { ENV['ELASTIC_PASSWORD'] || 'abc123' } let(:ca_file) { "spec/fixtures/test_certs/ca.crt" } - let(:es_url) do - es_url = ESHelper.get_host_port - SECURE_INTEGRATION ? "https://#{es_url}" : "http://#{es_url}" - end + let(:es_url) { "http#{SECURE_INTEGRATION ? 's' : nil}://#{ESHelper.get_host_port}" } let(:curl_args) do config['user'] ? "-u #{config['user']}:#{config['password']}" : '' @@ -46,6 +43,8 @@ ESHelper.curl_and_get_json_response "#{es_url}/_index_template/*", method: 'DELETE', args: curl_args # This can fail if there are no indexes, ignore failure. ESHelper.curl_and_get_json_response( "#{es_url}/_index/*", method: 'DELETE', args: curl_args) rescue nil + ESHelper.curl_and_get_json_response( "#{es_url}/logs", method: 'DELETE', args: curl_args) rescue nil + ESHelper.curl_and_get_json_response "#{es_url}/_refresh", method: 'POST', args: curl_args end shared_examples 'an elasticsearch index plugin' do @@ -56,6 +55,7 @@ it 'should retrieve json event from elasticsearch' do queue = [] plugin.run(queue) + expect(queue.size).to eq(10) event = queue.pop expect(event).to be_a(LogStash::Event) expect(event.get("response")).to eql(404) @@ -63,10 +63,6 @@ end describe 'against an unsecured elasticsearch', integration: true do - before(:each) do - plugin.register - end - it_behaves_like 'an elasticsearch index plugin' end @@ -136,4 +132,10 @@ end + describe 'slice', integration: true do + let(:config) { super().merge('slices' => 2, 'size' => 2) } + let(:plugin) { described_class.new(config) } + + it_behaves_like 'an elasticsearch index plugin' + end end diff --git a/spec/inputs/paginated_search_spec.rb b/spec/inputs/paginated_search_spec.rb new file mode 100644 index 00000000..50d062f0 --- /dev/null +++ b/spec/inputs/paginated_search_spec.rb @@ -0,0 +1,129 @@ +require "logstash/devutils/rspec/spec_helper" +require "logstash/inputs/elasticsearch/paginated_search" + +describe "Paginated search" do + let(:es_client) { double("Elasticsearch::Client") } + let(:settings) { { "index" => "logs", "query" => "{ \"sort\": [ \"_doc\" ] }", "scroll" => "1m", "retries" => 0, "size" => 1000 } } + let(:plugin) { double("LogStash::Inputs::Elasticsearch", params: settings, pipeline_id: "main", stop?: false) } + let(:pit_id) { "08fsAwILcmVzaGFyZC0yZmIWdzFnbl" } + + describe "search after" do + subject do + LogStash::Inputs::Elasticsearch::SearchAfter.new(es_client, plugin) + end + + describe "search options" do + context "query without sort" do + let(:settings) { super().merge({"query" => "{\"match_all\": {} }"}) } + + it "adds default sort" do + options = subject.search_options(pit_id: pit_id) + expect(options[:body][:sort]).to match({"_shard_doc": "asc"}) + end + end + + context "customize settings" do + let(:size) { 2 } + let(:slices) { 4 } + let(:settings) { super().merge({"slices" => slices, "size" => size}) } + + it "gives updated options" do + slice_id = 1 + search_after = [0, 0] + options = subject.search_options(pit_id: pit_id, slice_id: slice_id, search_after: search_after) + expect(options[:size]).to match(size) + expect(options[:body][:slice]).to match({:id => slice_id, :max => slices}) + expect(options[:body][:search_after]).to match(search_after) + end + end + end + + describe "search" do + let(:queue) { double("queue") } + let(:doc1) do + { + "_index" => "logstash", + "_type" => "logs", + "_id" => "C5b2xLQwTZa76jBmHIbwHQ", + "_score" => 1.0, + "_source" => { "message" => ["Halloween"] }, + "sort" => [0, 0] + } + end + let(:first_resp) do + { + "pit_id" => pit_id, + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 2, + "successful" => 2, + "skipped" => 0, + "failed" => 0 + }, + "hits" => { + "total" => { + "value" => 500, + "relation" => "eq" + }, + "hits" => [ doc1 ] + } + } + end + let(:last_resp) do + { + "pit_id" => pit_id, + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 2, + "successful" => 2, + "skipped" => 0, + "failed" => 0 + }, + "hits" => { + "total" => { + "value" => 500, + "relation" => "eq" + }, + "hits" => [ ] # empty hits to break the loop + } + } + end + + context "happy case" do + it "runs" do + expect(es_client).to receive(:search).with(instance_of(Hash)).and_return(first_resp, last_resp) + expect(plugin).to receive(:push_hit).with(doc1, queue).once + subject.search(output_queue: queue, pit_id: pit_id) + end + end + + context "with exception" do + it "closes pit" do + expect(es_client).to receive(:open_point_in_time).once.and_return({ "id" => pit_id}) + expect(plugin).to receive(:push_hit).with(doc1, queue).once + expect(es_client).to receive(:search).with(instance_of(Hash)).once.and_return(first_resp) + expect(es_client).to receive(:search).with(instance_of(Hash)).once.and_raise(Manticore::UnknownException) + expect(es_client).to receive(:close_point_in_time).with(any_args).once.and_return(nil) + subject.retryable_search(queue) + end + end + + context "with slices" do + let(:slices) { 2 } + let(:settings) { super().merge({"slices" => slices}) } + + it "runs two slices" do + expect(es_client).to receive(:open_point_in_time).once.and_return({ "id" => pit_id}) + expect(plugin).to receive(:push_hit).with(any_args).twice + expect(Thread).to receive(:new).and_call_original.exactly(slices).times + expect(es_client).to receive(:search).with(instance_of(Hash)).and_return(first_resp, last_resp, first_resp, last_resp) + expect(es_client).to receive(:close_point_in_time).with(any_args).once.and_return(nil) + subject.retryable_slice_search(queue) + end + end + end + end + +end \ No newline at end of file From 5f72c60405d0ea0de16bbf14b350b88d7e3fdd37 Mon Sep 17 00:00:00 2001 From: Karen Metts <35154725+karenzone@users.noreply.github.com> Date: Tue, 14 Nov 2023 11:45:10 -0500 Subject: [PATCH 159/207] Doc: Fix typo in heading id (#199) --- docs/index.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 7dfd5acb..1c876e01 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -381,7 +381,7 @@ This parameter controls the keepalive time in seconds of the scrolling request and initiates the scrolling process. The timeout applies per round trip (i.e. between the previous scroll request, to the next). -[id="plugins-{type}s-{plugin}-seearch_api"] +[id="plugins-{type}s-{plugin}-search_api"] ===== `search_api` * Value can be any of: `auto`, `search_after`, `scroll` From fac5191e7c11cba8fdd4e6e9401f4238fd3d479a Mon Sep 17 00:00:00 2001 From: Karen Metts <35154725+karenzone@users.noreply.github.com> Date: Mon, 4 Dec 2023 17:53:38 -0500 Subject: [PATCH 160/207] Bump to v4.19.1 to pick up doc fix needed for docgen (#200) --- CHANGELOG.md | 3 +++ logstash-input-elasticsearch.gemspec | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b79d08e6..081af97e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.19.1 + - Plugin version bump to pick up docs fix in [#199](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/199) required to clear build error in docgen. [#200](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/200) + ## 4.19.0 - Added `search_api` option to support `search_after` and `scroll` [#198](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/198) - The default value `auto` uses `search_after` for Elasticsearch >= 8, otherwise, fall back to `scroll` diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 792b7d9d..10eed7ac 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.19.0' + s.version = '4.19.1' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From 2577e6f466e1bc229c2b2b207a8ae499d671281c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Duarte?= Date: Wed, 24 Jan 2024 14:39:01 +0000 Subject: [PATCH 161/207] Support aggregations (#202) Add `response_type` configuration option to allow processing result of aggregations. The default `hits` will generate one event per returned document (i.e. "hit"), which is the current behavior. When set to `aggregations`, a single Logstash event will be generated with the contents of the `aggregations` object of the query's response. In this case the `hits` object will be ignored. The parameter `size` will be always be set to 0 regardless of the default or user-defined value set in this plugin. --------- Co-authored-by: MikhailMS <1488maiklm@gmail.com> --- CHANGELOG.md | 3 + docs/index.asciidoc | 15 ++++ lib/logstash/inputs/elasticsearch.rb | 34 +++++--- .../inputs/elasticsearch/aggregation.rb | 45 +++++++++++ logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 77 ++++++++++++++++--- 6 files changed, 153 insertions(+), 23 deletions(-) create mode 100644 lib/logstash/inputs/elasticsearch/aggregation.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 081af97e..81f20b12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.20.0 + - Added `response_type` configuration option to allow processing result of aggregations [#202](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/202) + ## 4.19.1 - Plugin version bump to pick up docs fix in [#199](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/199) required to clear build error in docgen. [#200](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/200) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 1c876e01..c99a4864 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -115,6 +115,7 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |<>|No | <> |<>|No +| <> |<>, one of `["hits","aggregations"]`|No | <> | <>|No | <> |<>|No | <> |<>|No @@ -337,6 +338,20 @@ documentation] for more information. When <> resolves to `search_after` and the query does not specify `sort`, the default sort `'{ "sort": { "_shard_doc": "asc" } }'` will be added to the query. Please refer to the {ref}/paginate-search-results.html#search-after[Elasticsearch search_after] parameter to know more. +[id="plugins-{type}s-{plugin}-response_type"] +===== `response_type` + + * Value can be any of: `hits`, `aggregations` + * Default value is `hits` + +Which part of the result to transform into Logstash events when processing the +response from the query. +The default `hits` will generate one event per returned document (i.e. "hit"). +When set to `aggregations`, a single Logstash event will be generated with the +contents of the `aggregations` object of the query's response. In this case the +`hits` object will be ignored. The parameter `size` will be always be set to +0 regardless of the default or user-defined value set in this plugin. + [id="plugins-{type}s-{plugin}-request_timeout_seconds"] ===== `request_timeout_seconds` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 7f3109c2..59aa036f 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -74,6 +74,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base require 'logstash/inputs/elasticsearch/paginated_search' + require 'logstash/inputs/elasticsearch/aggregation' include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1) include LogStash::PluginMixins::ECSCompatibilitySupport::TargetCheck @@ -101,6 +102,11 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html config :query, :validate => :string, :default => '{ "sort": [ "_doc" ] }' + # This allows you to speccify the response type: either hits or aggregations + # where hits: normal search request + # aggregations: aggregation request + config :response_type, :validate => ['hits', 'aggregations'], :default => 'hits' + # This allows you to set the maximum number of hits returned per scroll. config :size, :validate => :number, :default => 1000 @@ -282,11 +288,6 @@ def register fill_hosts_from_cloud_id setup_ssl_params! - @options = { - :index => @index, - :scroll => @scroll, - :size => @size - } @base_query = LogStash::Json.load(@query) if @slices @base_query.include?('slice') && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `query` option cannot specify specific `slice` when configured to manage parallel slices with `slices` option") @@ -328,21 +329,25 @@ def register setup_search_api + setup_query_executor + @client end - def run(output_queue) if @schedule - scheduler.cron(@schedule) { @paginated_search.do_run(output_queue) } + scheduler.cron(@schedule) { @query_executor.do_run(output_queue) } scheduler.join else - @paginated_search.do_run(output_queue) + @query_executor.do_run(output_queue) end end - def push_hit(hit, output_queue) - event = targeted_event_factory.new_event hit['_source'] + ## + # This can be called externally from the query_executor + public + def push_hit(hit, output_queue, root_field = '_source') + event = targeted_event_factory.new_event hit[root_field] set_docinfo_fields(hit, event) if @docinfo decorate(event) output_queue << event @@ -643,13 +648,20 @@ def setup_search_api @search_api end + end - @paginated_search = if @resolved_search_api == "search_after" + def setup_query_executor + @query_executor = case @response_type + when 'hits' + if @resolved_search_api == "search_after" LogStash::Inputs::Elasticsearch::SearchAfter.new(@client, self) else logger.warn("scroll API is no longer recommended for pagination. Consider using search_after instead.") if es_major_version >= 8 LogStash::Inputs::Elasticsearch::Scroll.new(@client, self) end + when 'aggregations' + LogStash::Inputs::Elasticsearch::Aggregation.new(@client, self) + end end module URIOrEmptyValidator diff --git a/lib/logstash/inputs/elasticsearch/aggregation.rb b/lib/logstash/inputs/elasticsearch/aggregation.rb new file mode 100644 index 00000000..7f26497b --- /dev/null +++ b/lib/logstash/inputs/elasticsearch/aggregation.rb @@ -0,0 +1,45 @@ +require 'logstash/helpers/loggable_try' + +module LogStash + module Inputs + class Elasticsearch + class Aggregation + include LogStash::Util::Loggable + + AGGREGATION_JOB = "aggregation" + + def initialize(client, plugin) + @client = client + @plugin_params = plugin.params + + @size = @plugin_params["size"] + @query = @plugin_params["query"] + @retries = @plugin_params["retries"] + @agg_options = { + :index => @index, + :size => 0 + }.merge(:body => @query) + + @plugin = plugin + end + + def retryable(job_name, &block) + stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name) + stud_try.try((@retries + 1).times) { yield } + rescue => e + error_details = {:message => e.message, :cause => e.cause} + error_details[:backtrace] = e.backtrace if logger.debug? + logger.error("Tried #{job_name} unsuccessfully", error_details) + end + + def do_run(output_queue) + logger.info("Aggregation starting") + r = retryable(AGGREGATION_JOB) do + @client.search(@agg_options) + end + @plugin.push_hit(r, output_queue, 'aggregations') + end + end + end + end +end diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 10eed7ac..29d4d1bd 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.19.1' + s.version = '4.20.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index cda84888..3ede1cc3 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -112,14 +112,14 @@ context "ES 8" do let(:es_version) { "8.10.0" } it "resolves `auto` to `search_after`" do - expect(plugin.instance_variable_get(:@paginated_search)).to be_a LogStash::Inputs::Elasticsearch::SearchAfter + expect(plugin.instance_variable_get(:@query_executor)).to be_a LogStash::Inputs::Elasticsearch::SearchAfter end end context "ES 7" do let(:es_version) { "7.17.0" } it "resolves `auto` to `scroll`" do - expect(plugin.instance_variable_get(:@paginated_search)).to be_a LogStash::Inputs::Elasticsearch::Scroll + expect(plugin.instance_variable_get(:@query_executor)).to be_a LogStash::Inputs::Elasticsearch::Scroll end end end @@ -268,7 +268,7 @@ before { plugin.register } it 'runs just one slice' do - expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).with(duck_type(:<<), nil) + expect(plugin.instance_variable_get(:@query_executor)).to receive(:search).with(duck_type(:<<), nil) expect(Thread).to_not receive(:new) plugin.run([]) @@ -280,7 +280,7 @@ before { plugin.register } it 'runs just one slice' do - expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).with(duck_type(:<<), nil) + expect(plugin.instance_variable_get(:@query_executor)).to receive(:search).with(duck_type(:<<), nil) expect(Thread).to_not receive(:new) plugin.run([]) @@ -295,7 +295,7 @@ it "runs #{slice_count} independent slices" do expect(Thread).to receive(:new).and_call_original.exactly(slice_count).times slice_count.times do |slice_id| - expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).with(duck_type(:<<), slice_id) + expect(plugin.instance_variable_get(:@query_executor)).to receive(:search).with(duck_type(:<<), slice_id) end plugin.run([]) @@ -423,8 +423,8 @@ def synchronize_method!(object, method_name) expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0) expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_return(slice1_response1) - synchronize_method!(plugin.instance_variable_get(:@paginated_search), :next_page) - synchronize_method!(plugin.instance_variable_get(:@paginated_search), :initial_search) + synchronize_method!(plugin.instance_variable_get(:@query_executor), :next_page) + synchronize_method!(plugin.instance_variable_get(:@query_executor), :initial_search) end let(:client) { Elasticsearch::Client.new } @@ -493,14 +493,14 @@ def synchronize_method!(object, method_name) expect(client).to receive(:search).with(hash_including(:body => slice1_query)).and_return(slice1_response0) expect(client).to receive(:scroll).with(hash_including(:body => { :scroll_id => slice1_scroll1 })).and_raise("boom") - synchronize_method!(plugin.instance_variable_get(:@paginated_search), :next_page) - synchronize_method!(plugin.instance_variable_get(:@paginated_search), :initial_search) + synchronize_method!(plugin.instance_variable_get(:@query_executor), :next_page) + synchronize_method!(plugin.instance_variable_get(:@query_executor), :initial_search) end let(:client) { Elasticsearch::Client.new } it 'insert event to queue without waiting other slices' do - expect(plugin.instance_variable_get(:@paginated_search)).to receive(:search).twice.and_wrap_original do |m, *args| + expect(plugin.instance_variable_get(:@query_executor)).to receive(:search).twice.and_wrap_original do |m, *args| q = args[0] slice_id = args[1] if slice_id == 0 @@ -1020,7 +1020,7 @@ def wait_receive_request it "should properly schedule" do begin - expect(plugin.instance_variable_get(:@paginated_search)).to receive(:do_run) { + expect(plugin.instance_variable_get(:@query_executor)).to receive(:do_run) { queue << LogStash::Event.new({}) }.at_least(:twice) runner = Thread.start { plugin.run(queue) } @@ -1033,7 +1033,62 @@ def wait_receive_request runner.join if runner end end + end + + context "aggregations" do + let(:config) do + { + 'hosts' => ["localhost"], + 'query' => '{ "query": {}, "size": 0, "aggs":{"total_count": { "value_count": { "field": "type" }}, "empty_count": { "sum": { "field": "_meta.empty_event" }}}}', + 'response_type' => 'aggregations', + 'size' => 0 + } + end + let(:mock_response) do + { + "took" => 27, + "timed_out" => false, + "_shards" => { + "total" => 169, + "successful" => 169, + "skipped" => 0, + "failed" => 0 + }, + "hits" => { + "total" => 10, + "max_score" => 1.0, + "hits" => [] + }, + "aggregations" => { + "total_counter" => { + "value" => 10 + }, + "empty_counter" => { + "value" => 5 + }, + } + } + end + + before(:each) do + client = Elasticsearch::Client.new + + expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) + expect(client).to receive(:search).with(any_args).and_return(mock_response) + expect(client).to receive(:ping) + end + + before { plugin.register } + + it 'creates the events from the aggregations' do + plugin.run queue + event = queue.pop + + expect(event).to be_a(LogStash::Event) + expect(event.get("[total_counter][value]")).to eql 10 + expect(event.get("[empty_counter][value]")).to eql 5 + end end context "retries" do From a878179c8a5c2463248738b3a0bb3513ded78c32 Mon Sep 17 00:00:00 2001 From: kaisecheng <69120390+kaisecheng@users.noreply.github.com> Date: Wed, 14 Feb 2024 09:56:10 +0000 Subject: [PATCH 162/207] Fix license header (#203) This plugin is OSS distribution. Elastic Licensed header was wrongly added to a file --- CHANGELOG.md | 3 +++ lib/logstash/helpers/loggable_try.rb | 4 ---- logstash-input-elasticsearch.gemspec | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 81f20b12..99c6d1e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.20.1 + - Fix license header [#203](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/203) + ## 4.20.0 - Added `response_type` configuration option to allow processing result of aggregations [#202](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/202) diff --git a/lib/logstash/helpers/loggable_try.rb b/lib/logstash/helpers/loggable_try.rb index d8030bac..07e5cea5 100644 --- a/lib/logstash/helpers/loggable_try.rb +++ b/lib/logstash/helpers/loggable_try.rb @@ -1,7 +1,3 @@ -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License; -# you may not use this file except in compliance with the Elastic License. - require 'stud/try' module LogStash module Helpers diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 29d4d1bd..d8127dca 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.20.0' + s.version = '4.20.1' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From c04401b4f1b3460ec7bd920c90223f22c88fce02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Duarte?= Date: Tue, 5 Mar 2024 09:41:28 +0000 Subject: [PATCH 163/207] fix case when aggregation returns an error (#204) --- CHANGELOG.md | 3 +++ lib/logstash/inputs/elasticsearch/aggregation.rb | 3 ++- logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 15 ++++++++++++--- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 99c6d1e5..3c38e04c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.20.2 + - fix case when aggregation returns an error [#204](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/204) + ## 4.20.1 - Fix license header [#203](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/203) diff --git a/lib/logstash/inputs/elasticsearch/aggregation.rb b/lib/logstash/inputs/elasticsearch/aggregation.rb index 7f26497b..0855cb87 100644 --- a/lib/logstash/inputs/elasticsearch/aggregation.rb +++ b/lib/logstash/inputs/elasticsearch/aggregation.rb @@ -30,6 +30,7 @@ def retryable(job_name, &block) error_details = {:message => e.message, :cause => e.cause} error_details[:backtrace] = e.backtrace if logger.debug? logger.error("Tried #{job_name} unsuccessfully", error_details) + false end def do_run(output_queue) @@ -37,7 +38,7 @@ def do_run(output_queue) r = retryable(AGGREGATION_JOB) do @client.search(@agg_options) end - @plugin.push_hit(r, output_queue, 'aggregations') + @plugin.push_hit(r, output_queue, 'aggregations') if r end end end diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index d8127dca..42a81c01 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.20.1' + s.version = '4.20.2' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 3ede1cc3..21b9509e 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1071,17 +1071,16 @@ def wait_receive_request } end + let(:client) { Elasticsearch::Client.new } before(:each) do - client = Elasticsearch::Client.new - expect(Elasticsearch::Client).to receive(:new).with(any_args).and_return(client) - expect(client).to receive(:search).with(any_args).and_return(mock_response) expect(client).to receive(:ping) end before { plugin.register } it 'creates the events from the aggregations' do + expect(client).to receive(:search).with(any_args).and_return(mock_response) plugin.run queue event = queue.pop @@ -1089,6 +1088,16 @@ def wait_receive_request expect(event.get("[total_counter][value]")).to eql 10 expect(event.get("[empty_counter][value]")).to eql 5 end + + context "when there's an exception" do + before(:each) do + allow(client).to receive(:search).and_raise RuntimeError + end + it 'produces no events' do + plugin.run queue + expect(queue).to be_empty + end + end end context "retries" do From f6a5b65b7bb76f069861db134e5544c84a5fe6e3 Mon Sep 17 00:00:00 2001 From: Karen Metts <35154725+karenzone@users.noreply.github.com> Date: Tue, 11 Jun 2024 18:21:28 -0400 Subject: [PATCH 164/207] Doc: Update link to resolve to target content (#206) Bump to v4.20.3 --- CHANGELOG.md | 3 +++ docs/index.asciidoc | 8 ++++---- logstash-input-elasticsearch.gemspec | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c38e04c..d6190add 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.20.3 + - [DOC] Update link to bypass redirect, resolving directly to correct content [#206](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/206) + ## 4.20.2 - fix case when aggregation returns an error [#204](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/204) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index c99a4864..a317c52f 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -301,10 +301,10 @@ can be either IP, HOST, IP:port, or HOST:port. The port defaults to * Value type is <> * Default value is `"logstash-*"` -The index or alias to search. See {ref}/multi-index.html[Multi Indices -documentation] in the Elasticsearch documentation for more information on how to -reference multiple indices. - +The index or alias to search. +Check out {ref}/api-conventions.html#api-multi-index[Multi Indices +documentation] in the Elasticsearch documentation for info on +referencing multiple indices. [id="plugins-{type}s-{plugin}-password"] ===== `password` diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 42a81c01..93f5fbc8 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.20.2' + s.version = '4.20.3' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From ef8874c3339c6c7e06f93e47e40fa80064dfeb7f Mon Sep 17 00:00:00 2001 From: kaisecheng <69120390+kaisecheng@users.noreply.github.com> Date: Fri, 23 Aug 2024 01:04:39 +0100 Subject: [PATCH 165/207] Fix aggregation to respect `index` param (#209) Fix issue where the `index` parameter was being ignored when using `response_type => aggregations` Fixed: #208 --------- Co-authored-by: Ry Biesemeyer --- .travis.yml | 2 -- CHANGELOG.md | 3 +++ lib/logstash/inputs/elasticsearch/aggregation.rb | 2 +- logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 6 ++++-- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1ba5da93..0ed7babe 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,10 +9,8 @@ env: - INTEGRATION=false ELASTIC_STACK_VERSION=8.x - INTEGRATION=false ELASTIC_STACK_VERSION=7.x SNAPSHOT=true - INTEGRATION=false ELASTIC_STACK_VERSION=8.x SNAPSHOT=true -- INTEGRATION=false ELASTIC_STACK_VERSION=7.16.3 MANTICORE_VERSION=0.7.1 ELASTICSEARCH_VERSION=7.15.0 - INTEGRATION=true ELASTIC_STACK_VERSION=7.x - INTEGRATION=true ELASTIC_STACK_VERSION=7.x SNAPSHOT=true LOG_LEVEL=info - INTEGRATION=true ELASTIC_STACK_VERSION=8.x SNAPSHOT=true LOG_LEVEL=info - SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x LOG_LEVEL=info - SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x ES_SSL_SUPPORTED_PROTOCOLS=TLSv1.3 -- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.16.3 MANTICORE_VERSION=0.7.1 ELASTICSEARCH_VERSION=7.14.1 LOG_LEVEL=info diff --git a/CHANGELOG.md b/CHANGELOG.md index d6190add..8a3fe7e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.20.4 + - Fix issue where the `index` parameter was being ignored when using `response_type => aggregations` [#209](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/209) + ## 4.20.3 - [DOC] Update link to bypass redirect, resolving directly to correct content [#206](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/206) diff --git a/lib/logstash/inputs/elasticsearch/aggregation.rb b/lib/logstash/inputs/elasticsearch/aggregation.rb index 0855cb87..e74a4357 100644 --- a/lib/logstash/inputs/elasticsearch/aggregation.rb +++ b/lib/logstash/inputs/elasticsearch/aggregation.rb @@ -16,7 +16,7 @@ def initialize(client, plugin) @query = @plugin_params["query"] @retries = @plugin_params["retries"] @agg_options = { - :index => @index, + :index => @plugin_params["index"], :size => 0 }.merge(:body => @query) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 93f5fbc8..ed79adc3 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.20.3' + s.version = '4.20.4' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 21b9509e..652c2996 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1036,12 +1036,14 @@ def wait_receive_request end context "aggregations" do + let(:index_name) { "rainbow" } let(:config) do { 'hosts' => ["localhost"], 'query' => '{ "query": {}, "size": 0, "aggs":{"total_count": { "value_count": { "field": "type" }}, "empty_count": { "sum": { "field": "_meta.empty_event" }}}}', 'response_type' => 'aggregations', - 'size' => 0 + 'size' => 0, + 'index' => index_name } end @@ -1080,7 +1082,7 @@ def wait_receive_request before { plugin.register } it 'creates the events from the aggregations' do - expect(client).to receive(:search).with(any_args).and_return(mock_response) + expect(client).to receive(:search).with(hash_including(:body => anything, :size => 0, :index => index_name)).and_return(mock_response) plugin.run queue event = queue.pop From af746246f80175b5579bc64414d538baadc2065c Mon Sep 17 00:00:00 2001 From: Cas Donoghue Date: Mon, 18 Nov 2024 01:43:19 -0800 Subject: [PATCH 166/207] Update travis config for new test matrix (#214) This commit updates the travis config to reflect the new release stream for logstash. In addition to the new test cells we use the upstream docker-setup.sh script from the upstream `.ci` repo. --- .ci/docker-setup.sh | 60 --------------------------------------------- .travis.yml | 22 +++++++++-------- 2 files changed, 12 insertions(+), 70 deletions(-) delete mode 100755 .ci/docker-setup.sh diff --git a/.ci/docker-setup.sh b/.ci/docker-setup.sh deleted file mode 100755 index 6b6aa730..00000000 --- a/.ci/docker-setup.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -# This is intended to be run the plugin's root directory. `.ci/docker-setup.sh` -# Ensure you have Docker installed locally and set the ELASTIC_STACK_VERSION environment variable. -set -e - -VERSION_URL="https://raw.githubusercontent.com/elastic/logstash/master/ci/logstash_releases.json" - - -pull_docker_snapshot() { - project="${1?project name required}" - local docker_image="docker.elastic.co/${project}/${project}:${ELASTIC_STACK_VERSION}" - echo "Pulling $docker_image" - docker pull "$docker_image" -} - -if [ "$ELASTIC_STACK_VERSION" ]; then - echo "Fetching versions from $VERSION_URL" - VERSIONS=$(curl --silent $VERSION_URL) - if [[ "$SNAPSHOT" = "true" ]]; then - ELASTIC_STACK_RETRIEVED_VERSION=$(echo $VERSIONS | jq '.snapshots."'"$ELASTIC_STACK_VERSION"'"') - echo $ELASTIC_STACK_RETRIEVED_VERSION - else - ELASTIC_STACK_RETRIEVED_VERSION=$(echo $VERSIONS | jq '.releases."'"$ELASTIC_STACK_VERSION"'"') - fi - if [[ "$ELASTIC_STACK_RETRIEVED_VERSION" != "null" ]]; then - # remove starting and trailing double quotes - ELASTIC_STACK_RETRIEVED_VERSION="${ELASTIC_STACK_RETRIEVED_VERSION%\"}" - ELASTIC_STACK_RETRIEVED_VERSION="${ELASTIC_STACK_RETRIEVED_VERSION#\"}" - echo "Translated $ELASTIC_STACK_VERSION to ${ELASTIC_STACK_RETRIEVED_VERSION}" - export ELASTIC_STACK_VERSION=$ELASTIC_STACK_RETRIEVED_VERSION - fi - - echo "Testing against version: $ELASTIC_STACK_VERSION" - - if [[ "$ELASTIC_STACK_VERSION" = *"-SNAPSHOT" ]]; then - pull_docker_snapshot "logstash" - if [ "$INTEGRATION" == "true" ]; then - pull_docker_snapshot "elasticsearch" - fi - fi - - if [ -f Gemfile.lock ]; then - rm Gemfile.lock - fi - - cd .ci - - if [ "$INTEGRATION" == "true" ]; then - docker-compose down - docker-compose build - else - docker-compose down - docker-compose build logstash - fi -else - echo "Please set the ELASTIC_STACK_VERSION environment variable" - echo "For example: export ELASTIC_STACK_VERSION=6.2.4" - exit 1 -fi diff --git a/.travis.yml b/.travis.yml index 0ed7babe..ee11240d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,13 +4,15 @@ import: before_install: - sudo sysctl -w vm.max_map_count=262144 # due ES bootstrap requirements -env: -- INTEGRATION=false ELASTIC_STACK_VERSION=7.x -- INTEGRATION=false ELASTIC_STACK_VERSION=8.x -- INTEGRATION=false ELASTIC_STACK_VERSION=7.x SNAPSHOT=true -- INTEGRATION=false ELASTIC_STACK_VERSION=8.x SNAPSHOT=true -- INTEGRATION=true ELASTIC_STACK_VERSION=7.x -- INTEGRATION=true ELASTIC_STACK_VERSION=7.x SNAPSHOT=true LOG_LEVEL=info -- INTEGRATION=true ELASTIC_STACK_VERSION=8.x SNAPSHOT=true LOG_LEVEL=info -- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x LOG_LEVEL=info -- SECURE_INTEGRATION=true INTEGRATION=true ELASTIC_STACK_VERSION=7.x ES_SSL_SUPPORTED_PROTOCOLS=TLSv1.3 +jobs: + include: + - stage: "Integration Tests" + - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=7.current + - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.previous + - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.current + - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.next + - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.future + - stage: "Secure Integration Tests" + - env: SECURE_INTEGRATION=true INTEGRATION=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.current SNAPSHOT=true + - env: SECURE_INTEGRATION=true INTEGRATION=true LOG_LEVEL=info ELASTIC_STACK_VERSION=7.current + - env: SECURE_INTEGRATION=true INTEGRATION=true LOG_LEVEL=info ELASTIC_STACK_VERSION=7.current ES_SSL_SUPPORTED_PROTOCOLS=TLSv1.3 From 4fec6d8a5878d888eb9779fada4e03a8cfc9c6a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Duarte?= Date: Wed, 20 Nov 2024 19:06:57 +0000 Subject: [PATCH 167/207] Update .travis.yml to properly separate testing stages (#215) --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index ee11240d..23a3c486 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,12 +7,12 @@ before_install: jobs: include: - stage: "Integration Tests" - - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=7.current + env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=7.current - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.previous - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.current - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.next - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.future - stage: "Secure Integration Tests" - - env: SECURE_INTEGRATION=true INTEGRATION=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.current SNAPSHOT=true + env: SECURE_INTEGRATION=true INTEGRATION=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.current SNAPSHOT=true - env: SECURE_INTEGRATION=true INTEGRATION=true LOG_LEVEL=info ELASTIC_STACK_VERSION=7.current - env: SECURE_INTEGRATION=true INTEGRATION=true LOG_LEVEL=info ELASTIC_STACK_VERSION=7.current ES_SSL_SUPPORTED_PROTOCOLS=TLSv1.3 From f49b71066b8ae1f9e1c306f39f018f38616d679c Mon Sep 17 00:00:00 2001 From: Cas Donoghue Date: Thu, 5 Dec 2024 12:55:00 -0800 Subject: [PATCH 168/207] Set x-elastic-product-origin header for ES requests (#211) * Set x-elastic-product-origin header for ES requests This commit updates the `Elasticsearch::Client` used to make requests to ES to send along a header identifying the request as originating from an internal component. * Prep 4.20.5 release Update version and add CHANGELOG entry. --- CHANGELOG.md | 3 +++ lib/logstash/inputs/elasticsearch.rb | 2 ++ logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 12 ++++++++++++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a3fe7e4..c3d053f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.20.5 + - Add `x-elastic-product-origin` header to Elasticsearch requests [#211](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/211) + ## 4.20.4 - Fix issue where the `index` parameter was being ignored when using `response_type => aggregations` [#209](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/209) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 59aa036f..f0faa91b 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -271,6 +271,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base BUILD_FLAVOR_SERVERLESS = 'serverless'.freeze DEFAULT_EAV_HEADER = { "Elastic-Api-Version" => "2023-10-31" }.freeze + INTERNAL_ORIGIN_HEADER = { 'x-elastic-product-origin' => 'logstash-input-elasticsearch'}.freeze def initialize(params={}) super(params) @@ -300,6 +301,7 @@ def register fill_user_password_from_cloud_auth transport_options = {:headers => {}} + transport_options[:headers].merge!(INTERNAL_ORIGIN_HEADER) transport_options[:headers].merge!(setup_basic_auth(user, password)) transport_options[:headers].merge!(setup_api_key(api_key)) transport_options[:headers].merge!({'user-agent' => prepare_user_agent()}) diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index ed79adc3..d00115a5 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.20.4' + s.version = '4.20.5' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 652c2996..bddb6cf7 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -50,6 +50,12 @@ client = plugin.send(:client) expect( extract_transport(client).options[:transport_options][:headers] ).not_to match hash_including("Elastic-Api-Version" => "2023-10-31") end + + it "sets an x-elastic-product-origin header identifying this as an internal plugin request" do + plugin.register + client = plugin.send(:client) + expect( extract_transport(client).options[:transport_options][:headers] ).to match hash_including("x-elastic-product-origin"=>"logstash-input-elasticsearch") + end end context "against not authentic Elasticsearch" do @@ -90,6 +96,12 @@ client = plugin.send(:client) expect( extract_transport(client).options[:transport_options][:headers] ).to match hash_including("Elastic-Api-Version" => "2023-10-31") end + + it "sets an x-elastic-product-origin header identifying this as an internal plugin request" do + plugin.register + client = plugin.send(:client) + expect( extract_transport(client).options[:transport_options][:headers] ).to match hash_including("x-elastic-product-origin"=>"logstash-input-elasticsearch") + end end end From 56d923f9d95d22b6ea3cc16737b6e9234d8a52ee Mon Sep 17 00:00:00 2001 From: Cas Donoghue Date: Mon, 16 Dec 2024 15:45:53 -0800 Subject: [PATCH 169/207] Remove deprecated SSL settings and simplify SSL configuration (#213) * Mark deprecated SSL settings as obsolete This commit updates SSL settings to be marked as obsolete: - Replace `ssl` with `ssl_enabled` - Replace `ca_file` with `ssl_certificate_authorities` - Replace `ssl_certificate_verification` with `ssl_verification_mode` `setup_ssl_params!` has been updated to only handle SSL inference when not explicitly configured. * Fix accidental removal of changelog entry During a rebase the latest 4 series entry was deleted. This restores it. --- CHANGELOG.md | 8 ++++ docs/index.asciidoc | 63 ++++++++-------------------- lib/logstash/inputs/elasticsearch.rb | 54 ++++-------------------- logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 13 ++++++ 5 files changed, 49 insertions(+), 91 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c3d053f7..e70053a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## 5.0.0 + - SSL settings that were marked deprecated in version `4.17.0` are now marked obsolete, and will prevent the plugin from starting. + - These settings are: + - `ssl`, which should bre replaced by `ssl_enabled` + - `ca_file`, which should bre replaced by `ssl_certificate_authorities` + - `ssl_certificate_verification`, which should bre replaced by `ssl_verification_mode` + - [#213](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/213) + ## 4.20.5 - Add `x-elastic-product-origin` header to Elasticsearch requests [#211](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/211) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index a317c52f..04a50eaf 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -96,7 +96,13 @@ TIP: Set the `target` option to avoid potential schema conflicts. [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Input configuration options -This plugin supports the following configuration options plus the <> and the <> described later. +This plugin supports these configuration options plus the <> described later. + +NOTE: As of version `5.0.0` of this plugin, a number of previously deprecated settings related to SSL have been removed. +Please check out <> for details. + +NOTE: As of version `5.0.0` of this plugin, a number of previously deprecated settings related to SSL have been removed. +Please check out <> for details. [cols="<,<,<",options="header",] |======================================================================= @@ -478,6 +484,8 @@ Enable SSL/TLS secured communication to Elasticsearch cluster. Leaving this unspecified will use whatever scheme is specified in the URLs listed in <> or extracted from the <>. If no explicit protocol is specified plain HTTP will be used. +When not explicitly set, SSL will be automatically enabled if any of the specified hosts use HTTPS. + [id="plugins-{type}s-{plugin}-ssl_key"] ===== `ssl_key` * Value type is <> @@ -608,56 +616,21 @@ option when authenticating to the Elasticsearch server. If set to an empty string authentication will be disabled. -[id="plugins-{type}s-{plugin}-deprecated-options"] -==== Elasticsearch Input deprecated configuration options +[id="plugins-{type}s-{plugin}-obsolete-options"] +==== Elasticsearch Input Obsolete Configuration Options -This plugin supports the following deprecated configurations. +WARNING: As of version `5.0.0` of this plugin, some configuration options have been replaced. +The plugin will fail to start if it contains any of these obsolete options. -WARNING: Deprecated options are subject to removal in future releases. -[cols="<,<,<",options="header",] +[cols="<,<",options="header",] |======================================================================= -|Setting|Input type|Replaced by -| <> |a valid filesystem path|<> -| <> |<>|<> -| <> |<>|<> +|Setting|Replaced by +| ca_file | <> +| ssl | <> +| ssl_certificate_verification | <> |======================================================================= -[id="plugins-{type}s-{plugin}-ca_file"] -===== `ca_file` -deprecated[4.17.0, Replaced by <>] - -* Value type is <> -* There is no default value for this setting. - -SSL Certificate Authority file in PEM encoded format, must also include any chain certificates as necessary. - -[id="plugins-{type}s-{plugin}-ssl"] -===== `ssl` -deprecated[4.17.0, Replaced by <>] - -* Value type is <> -* Default value is `false` - -If enabled, SSL will be used when communicating with the Elasticsearch -server (i.e. HTTPS will be used instead of plain HTTP). - - -[id="plugins-{type}s-{plugin}-ssl_certificate_verification"] -===== `ssl_certificate_verification` -deprecated[4.17.0, Replaced by <>] - -* Value type is <> -* Default value is `true` - -Option to validate the server's certificate. Disabling this severely compromises security. -When certificate validation is disabled, this plugin implicitly trusts the machine -resolved at the given address without validating its proof-of-identity. -In this scenario, the plugin can transmit credentials to or process data from an untrustworthy -man-in-the-middle or other compromised infrastructure. -More information on the importance of certificate verification: -**https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf**. - [id="plugins-{type}s-{plugin}-common-options"] include::{include_path}/{type}.asciidoc[] diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index f0faa91b..cbf92220 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -198,23 +198,12 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # Set the address of a forward HTTP proxy. config :proxy, :validate => :uri_or_empty - # SSL - config :ssl, :validate => :boolean, :default => false, :deprecated => "Set 'ssl_enabled' instead." - - # SSL Certificate Authority file in PEM encoded format, must also include any chain certificates as necessary - config :ca_file, :validate => :path, :deprecated => "Set 'ssl_certificate_authorities' instead." - # OpenSSL-style X.509 certificate certificate to authenticate the client config :ssl_certificate, :validate => :path # SSL Certificate Authority files in PEM encoded format, must also include any chain certificates as necessary config :ssl_certificate_authorities, :validate => :path, :list => true - # Option to validate the server's certificate. Disabling this severely compromises security. - # For more information on the importance of certificate verification please read - # https://www.cs.utexas.edu/~shmat/shmat_ccs12.pdf - config :ssl_certificate_verification, :validate => :boolean, :default => true, :deprecated => "Set 'ssl_verification_mode' instead." - # The list of cipher suites to use, listed by priorities. # Supported cipher suites vary depending on which version of Java is used. config :ssl_cipher_suites, :validate => :string, :list => true @@ -242,7 +231,6 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base config :ssl_truststore_password, :validate => :password # The JKS truststore to validate the server's certificate. - # Use either `:ssl_truststore_path` or `:ssl_certificate_authorities` config :ssl_truststore_path, :validate => :path # The format of the truststore file. It must be either jks or pkcs12 @@ -264,6 +252,11 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # If set, the _source of each hit will be added nested under the target instead of at the top-level config :target, :validate => :field_reference + # Obsolete Settings + config :ssl, :obsolete => "Set 'ssl_enabled' instead." + config :ca_file, :obsolete => "Set 'ssl_certificate_authorities' instead." + config :ssl_certificate_verification, :obsolete => "Set 'ssl_verification_mode' instead." + # config :ca_trusted_fingerprint, :validate => :sha_256_hex include LogStash::PluginMixins::CATrustedFingerprintSupport @@ -408,8 +401,6 @@ def setup_client_ssl ssl_options[:ssl] = true if @ssl_enabled unless @ssl_enabled - # Keep it backward compatible with the deprecated `ssl` option - ssl_options[:trust_strategy] = trust_strategy_for_ca_trusted_fingerprint if original_params.include?('ssl') return ssl_options end @@ -473,38 +464,11 @@ def setup_client_ssl end def setup_ssl_params! - @ssl_enabled = normalize_config(:ssl_enabled) do |normalize| - normalize.with_deprecated_alias(:ssl) - end - - # Infer the value if neither the deprecate `ssl` and `ssl_enabled` were set - infer_ssl_enabled_from_hosts - - @ssl_certificate_authorities = normalize_config(:ssl_certificate_authorities) do |normalize| - normalize.with_deprecated_mapping(:ca_file) do |ca_file| - [ca_file] - end + # Only infer ssl_enabled if it wasn't explicitly set + unless original_params.include?('ssl_enabled') + @ssl_enabled = effectively_ssl? + params['ssl_enabled'] = @ssl_enabled end - - @ssl_verification_mode = normalize_config(:ssl_verification_mode) do |normalize| - normalize.with_deprecated_mapping(:ssl_certificate_verification) do |ssl_certificate_verification| - if ssl_certificate_verification == true - "full" - else - "none" - end - end - end - - params['ssl_enabled'] = @ssl_enabled - params['ssl_certificate_authorities'] = @ssl_certificate_authorities unless @ssl_certificate_authorities.nil? - params['ssl_verification_mode'] = @ssl_verification_mode unless @ssl_verification_mode.nil? - end - - def infer_ssl_enabled_from_hosts - return if original_params.include?('ssl') || original_params.include?('ssl_enabled') - - @ssl_enabled = params['ssl_enabled'] = effectively_ssl? end def setup_hosts diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index d00115a5..38f4c51e 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '4.20.5' + s.version = '5.0.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index bddb6cf7..8e24af58 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -58,6 +58,19 @@ end end + describe 'handling obsolete settings' do + [{:name => 'ssl', :replacement => 'ssl_enabled', :sample_value => true}, + {:name => 'ca_file', :replacement => 'ssl_certificate_authorities', :sample_value => 'spec/fixtures/test_certs/ca.crt'}, + {:name => 'ssl_certificate_verification', :replacement => 'ssl_verification_mode', :sample_value => false }].each do | obsolete_setting| + context "with obsolete #{obsolete_setting[:name]}" do + let (:config) { {obsolete_setting[:name] => obsolete_setting[:sample_value]} } + it "should raise a config error with the appropriate message" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /The setting `#{obsolete_setting[:name]}` in plugin `elasticsearch` is obsolete and is no longer available. Set '#{obsolete_setting[:replacement]}' instead/i + end + end + end + end + context "against not authentic Elasticsearch" do before(:each) do Elasticsearch::Client.send(:define_method, :ping) { raise Elasticsearch::UnsupportedProductError.new("Fake error") } # define error ping method From 1fece9357bc10be4b0984bef0a3c065b45ac466f Mon Sep 17 00:00:00 2001 From: flexitrev Date: Fri, 5 Jul 2024 15:43:37 -0400 Subject: [PATCH 170/207] Adding support for custom headers Co-authored-by: Karen Metts <35154725+karenzone@users.noreply.github.com> --- docs/index.asciidoc | 39 ++++++++++++++++++---------- lib/logstash/inputs/elasticsearch.rb | 8 ++++-- spec/inputs/elasticsearch_spec.rb | 16 ++++++++++++ 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 04a50eaf..4f0827d3 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -23,7 +23,7 @@ include::{include_path}/plugin_header.asciidoc[] Read from an Elasticsearch cluster, based on search query results. This is useful for replaying test logs, reindexing, etc. -You can periodically schedule ingestion using a cron syntax +You can periodically schedule ingestion using a cron syntax (see `schedule` setting) or run the query one time to load data into Logstash. @@ -112,6 +112,7 @@ Please check out <> for details. | <> |<>|No | <> |<>|No | <> | <>|No +| <> |<>|No | <> |<>|No | <> |<>|No | <> |<>|No @@ -205,8 +206,18 @@ For more info, check out the The maximum amount of time, in seconds, to wait while establishing a connection to Elasticsearch. Connect timeouts tend to occur when Elasticsearch or an intermediate proxy is overloaded with requests and has exhausted its connection pool. +[id="plugins-{type}s-{plugin}-custom_headers"] +===== `custom_headers` + + * Value type is <> + * Default value is empty + +Pass a set of key value pairs as the headers sent in each request to an elasticsearch node. +The headers will be used for any kind of request. +These custom headers will be overridden by settings such as `compression_level`. + [id="plugins-{type}s-{plugin}-docinfo"] -===== `docinfo` +===== `docinfo` * Value type is <> * Default value is `false` @@ -257,7 +268,7 @@ Example [id="plugins-{type}s-{plugin}-docinfo_fields"] -===== `docinfo_fields` +===== `docinfo_fields` * Value type is <> * Default value is `["_index", "_type", "_id"]` @@ -268,7 +279,7 @@ option lists the metadata fields to save in the current event. See more information. [id="plugins-{type}s-{plugin}-docinfo_target"] -===== `docinfo_target` +===== `docinfo_target` * Value type is <> * Default value depends on whether <> is enabled: @@ -292,7 +303,7 @@ this option names the field under which to store the metadata fields as subfield Controls this plugin's compatibility with the {ecs-ref}[Elastic Common Schema (ECS)]. [id="plugins-{type}s-{plugin}-hosts"] -===== `hosts` +===== `hosts` * Value type is <> * There is no default value for this setting. @@ -302,18 +313,18 @@ can be either IP, HOST, IP:port, or HOST:port. The port defaults to 9200. [id="plugins-{type}s-{plugin}-index"] -===== `index` +===== `index` * Value type is <> * Default value is `"logstash-*"` -The index or alias to search. +The index or alias to search. Check out {ref}/api-conventions.html#api-multi-index[Multi Indices documentation] in the Elasticsearch documentation for info on referencing multiple indices. [id="plugins-{type}s-{plugin}-password"] -===== `password` +===== `password` * Value type is <> * There is no default value for this setting. @@ -333,7 +344,7 @@ An empty string is treated as if proxy was not set, this is useful when using environment variables e.g. `proxy => '${LS_PROXY:}'`. [id="plugins-{type}s-{plugin}-query"] -===== `query` +===== `query` * Value type is <> * Default value is `'{ "sort": [ "_doc" ] }'` @@ -381,7 +392,7 @@ The default is 0 (no retry). This value should be equal to or greater than zero. NOTE: Partial failures - such as errors in a subset of all slices - can result in the entire query being retried, which can lead to duplication of data. Avoiding this would require Logstash to store the entire result set of a query in memory which is often not possible. [id="plugins-{type}s-{plugin}-schedule"] -===== `schedule` +===== `schedule` * Value type is <> * There is no default value for this setting. @@ -393,7 +404,7 @@ There is no schedule by default. If no schedule is given, then the statement is exactly once. [id="plugins-{type}s-{plugin}-scroll"] -===== `scroll` +===== `scroll` * Value type is <> * Default value is `"1m"` @@ -416,7 +427,7 @@ The query requires at least one `sort` field, as described in the <> * Default value is `1000` @@ -606,7 +617,7 @@ It is also possible to target an entry in the event's metadata, which will be av [id="plugins-{type}s-{plugin}-user"] -===== `user` +===== `user` * Value type is <> * There is no default value for this setting. @@ -620,7 +631,7 @@ empty string authentication will be disabled. ==== Elasticsearch Input Obsolete Configuration Options WARNING: As of version `5.0.0` of this plugin, some configuration options have been replaced. -The plugin will fail to start if it contains any of these obsolete options. +The plugin will fail to start if it contains any of these obsolete options. [cols="<,<",options="header",] diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index cbf92220..bd7a25a0 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -24,9 +24,9 @@ # called `http.content_type.required`. If this option is set to `true`, and you # are using Logstash 2.4 through 5.2, you need to update the Elasticsearch input # plugin to version 4.0.2 or higher. -# +# # ================================================================================ -# +# # Read from an Elasticsearch cluster, based on search query results. # This is useful for replaying test logs, reindexing, etc. # It also supports periodically scheduling lookup enrichments @@ -166,6 +166,9 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/_document_metadata.html config :docinfo_fields, :validate => :array, :default => ['_index', '_type', '_id'] + # Custom headers for Elasticsearch requests + config :custom_headers, :validate => :hash, :default => {} + # Basic Auth - username config :user, :validate => :string @@ -298,6 +301,7 @@ def register transport_options[:headers].merge!(setup_basic_auth(user, password)) transport_options[:headers].merge!(setup_api_key(api_key)) transport_options[:headers].merge!({'user-agent' => prepare_user_agent()}) + transport_options[:headers].merge!(@custom_headers) if @custom_headers transport_options[:request_timeout] = @request_timeout_seconds unless @request_timeout_seconds.nil? transport_options[:connect_timeout] = @connect_timeout_seconds unless @connect_timeout_seconds.nil? transport_options[:socket_timeout] = @socket_timeout_seconds unless @socket_timeout_seconds.nil? diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 8e24af58..a4f67515 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -116,6 +116,22 @@ expect( extract_transport(client).options[:transport_options][:headers] ).to match hash_including("x-elastic-product-origin"=>"logstash-input-elasticsearch") end end + + context "with custom headers" do + let(:config) do + { + "schedule" => "* * * * * UTC", + "custom_headers" => { "Custom-Header-1" => "Custom Value 1", "Custom-Header-2" => "Custom Value 2" } + } + end + + + it "sets custom headers" do + plugin.register + client = plugin.send(:client) + expect( extract_transport(client).options[:transport_options][:headers] ).to match hash_including(config["custom_headers"]) + end + end end context "retry" do From 3f6c6502cb35de72f1f26359c02428a4af072ac5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Duarte?= Date: Tue, 17 Dec 2024 13:10:42 +0000 Subject: [PATCH 171/207] Update docs/index.asciidoc --- docs/index.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 4f0827d3..e23ac9dc 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -214,7 +214,7 @@ Connect timeouts tend to occur when Elasticsearch or an intermediate proxy is ov Pass a set of key value pairs as the headers sent in each request to an elasticsearch node. The headers will be used for any kind of request. -These custom headers will be overridden by settings such as `compression_level`. +These custom headers will override any headers previously set by the plugin such as the User Agent or Authorization headers. [id="plugins-{type}s-{plugin}-docinfo"] ===== `docinfo` From 6ef0ffcaf2f8298bdbc8a5a028c4c603bb4b1a78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Duarte?= Date: Tue, 17 Dec 2024 13:11:21 +0000 Subject: [PATCH 172/207] Update lib/logstash/inputs/elasticsearch.rb --- lib/logstash/inputs/elasticsearch.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index bd7a25a0..5c0e700d 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -301,7 +301,7 @@ def register transport_options[:headers].merge!(setup_basic_auth(user, password)) transport_options[:headers].merge!(setup_api_key(api_key)) transport_options[:headers].merge!({'user-agent' => prepare_user_agent()}) - transport_options[:headers].merge!(@custom_headers) if @custom_headers + transport_options[:headers].merge!(@custom_headers) unless @custom_headers.empty? transport_options[:request_timeout] = @request_timeout_seconds unless @request_timeout_seconds.nil? transport_options[:connect_timeout] = @connect_timeout_seconds unless @connect_timeout_seconds.nil? transport_options[:socket_timeout] = @socket_timeout_seconds unless @socket_timeout_seconds.nil? From 675f759e7ae5b40a508acab58d3d4e71b1596735 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Duarte?= Date: Wed, 18 Dec 2024 12:30:00 +0000 Subject: [PATCH 173/207] [skip ci] Update CHANGELOG.md to include entry for #207 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e70053a8..cc7a7801 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - `ca_file`, which should bre replaced by `ssl_certificate_authorities` - `ssl_certificate_verification`, which should bre replaced by `ssl_verification_mode` - [#213](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/213) + - Add support for custom headers [#207](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/207) ## 4.20.5 - Add `x-elastic-product-origin` header to Elasticsearch requests [#211](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/211) From ddc575af60333c46aefa5b80c9a3805ab964b089 Mon Sep 17 00:00:00 2001 From: Ry Biesemeyer Date: Wed, 8 Jan 2025 07:26:47 -0800 Subject: [PATCH 174/207] Tolerate failure (#183) (#218) * Tolerate failure (#183) * test setup: ensure presence of /etc/protocols * test setup: actually run secure_integration tests When SECURE_INTEGRATION is speicified, the (non-secure) `:integration` specs are excluded, so we cannot have the `:secure_integration` specs wrapped in a context flagged as `:integration`. * test setup: regnerate test certs (and add regen script) * test setup: give ES the full cert chain In order for the `ca_trusted_fingerprint` specs to work with the CA's fingerprint, ES needs to be configured to present a cert chain that includes the CA. * resilience: prevent failures from crashing plugin When an Event cannot be created directly from the hit, or when the docinfo cannot be merged into a non-hash field in the hit, emit an Event tagged with `_elasticsearch_input_failure` that contains the JSON-encoded hit in `[event][original]` instead of crashing. * add link to changelog * remove orphan method from refactor * add link to PR in CHANGELOG.md --- .ci/Dockerfile.elasticsearch | 2 +- .ci/setup.sh | 11 ++ CHANGELOG.md | 4 + docs/index.asciidoc | 10 ++ lib/logstash/inputs/elasticsearch.rb | 20 +++- logstash-input-elasticsearch.gemspec | 2 +- spec/fixtures/test_certs/GENERATED_AT | 1 + spec/fixtures/test_certs/ca.crt | 35 +++--- spec/fixtures/test_certs/ca.der.sha256 | 2 +- spec/fixtures/test_certs/es.chain.crt | 38 +++++++ spec/fixtures/test_certs/es.crt | 35 +++--- spec/fixtures/test_certs/renew.sh | 15 +++ spec/inputs/elasticsearch_spec.rb | 105 +++++++++++++++++- spec/inputs/integration/elasticsearch_spec.rb | 2 +- 14 files changed, 233 insertions(+), 49 deletions(-) create mode 100755 .ci/setup.sh create mode 100644 spec/fixtures/test_certs/GENERATED_AT create mode 100644 spec/fixtures/test_certs/es.chain.crt create mode 100755 spec/fixtures/test_certs/renew.sh diff --git a/.ci/Dockerfile.elasticsearch b/.ci/Dockerfile.elasticsearch index 8ff2b75a..fb9f58cf 100644 --- a/.ci/Dockerfile.elasticsearch +++ b/.ci/Dockerfile.elasticsearch @@ -13,7 +13,7 @@ COPY --chown=elasticsearch:elasticsearch spec/fixtures/test_certs/* $es_path/con RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.enabled: true" >> $es_yml; fi RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.key: $es_path/config/test_certs/es.key" >> $es_yml; fi -RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.certificate: $es_path/config/test_certs/es.crt" >> $es_yml; fi +RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.certificate: $es_path/config/test_certs/es.chain.crt" >> $es_yml; fi RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.certificate_authorities: [ '$es_path/config/test_certs/ca.crt' ]" >> $es_yml; fi RUN if [ "$SECURE_INTEGRATION" = "true" ] ; then echo "xpack.security.http.ssl.verification_mode: certificate" >> $es_yml; fi RUN if [ "$SECURE_INTEGRATION" = "true" ] && [ -n "$ES_SSL_SUPPORTED_PROTOCOLS" ] ; then echo "xpack.security.http.ssl.supported_protocols: ${ES_SSL_SUPPORTED_PROTOCOLS}" >> $es_yml; fi diff --git a/.ci/setup.sh b/.ci/setup.sh new file mode 100755 index 00000000..54a42db2 --- /dev/null +++ b/.ci/setup.sh @@ -0,0 +1,11 @@ +# user_agent requires /etc/protocols, which is provided by netbase. +# https://github.com/jruby/jruby/issues/3955 +if [ ! -f "/etc/protocols" ]; then + if [ $(command -v apt-get) ]; then + echo "installing netbase with apt-get" + sudo apt-get install -y netbase + else + echo "installing netbase with yum" + sudo yum install -y netbase + fi +fi \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index cc7a7801..00c0b684 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 5.0.1 + - Fix: prevent plugin crash when hits contain illegal structure [#218](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/218) + - When a hit cannot be converted to an event, the input now emits an event tagged with `_elasticsearch_input_failure` with an `[event][original]` containing a JSON-encoded string representation of the entire hit. + ## 5.0.0 - SSL settings that were marked deprecated in version `4.17.0` are now marked obsolete, and will prevent the plugin from starting. - These settings are: diff --git a/docs/index.asciidoc b/docs/index.asciidoc index e23ac9dc..40f2df10 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -93,6 +93,16 @@ The plugin logs a warning when ECS is enabled and `target` isn't set. TIP: Set the `target` option to avoid potential schema conflicts. +[id="plugins-{type}s-{plugin}-failure-handling"] +==== Failure handling + +When this input plugin cannot create a structured `Event` from a hit result, it will instead create an `Event` that is tagged with `_elasticsearch_input_failure` whose `[event][original]` is a JSON-encoded string representation of the entire hit. + +Common causes are: + + - When the hit result contains top-level fields that are {logstash-ref}/processing.html#reserved-fields[reserved in Logstash] but do not have the expected shape. Use the <> directive to avoid conflicts with the top-level namespace. + - When <> is enabled and the docinfo fields cannot be merged into the hit result. Combine <> and <> to avoid conflict. + [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Input configuration options diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 5c0e700d..dd1f047f 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -346,21 +346,29 @@ def run(output_queue) # This can be called externally from the query_executor public def push_hit(hit, output_queue, root_field = '_source') - event = targeted_event_factory.new_event hit[root_field] - set_docinfo_fields(hit, event) if @docinfo + event = event_from_hit(hit, root_field) decorate(event) output_queue << event end + def event_from_hit(hit, root_field) + event = targeted_event_factory.new_event hit[root_field] + set_docinfo_fields(hit, event) if @docinfo + + event + rescue => e + serialized_hit = hit.to_json + logger.warn("Event creation error, original data now in [event][original] field", message: e.message, exception: e.class, data: serialized_hit) + return event_factory.new_event('event' => { 'original' => serialized_hit }, 'tags' => ['_elasticsearch_input_failure']) + end + def set_docinfo_fields(hit, event) # do not assume event[@docinfo_target] to be in-place updatable. first get it, update it, then at the end set it in the event. docinfo_target = event.get(@docinfo_target) || {} unless docinfo_target.is_a?(Hash) - @logger.error("Incompatible Event, incompatible type for the docinfo_target=#{@docinfo_target} field in the `_source` document, expected a hash got:", :docinfo_target_type => docinfo_target.class, :event => event.to_hash_with_metadata) - - # TODO: (colin) I am not sure raising is a good strategy here? - raise Exception.new("Elasticsearch input: incompatible event") + # expect error to be handled by `#event_from_hit` + fail RuntimeError, "Incompatible event; unable to merge docinfo fields into docinfo_target=`#{@docinfo_target}`" end @docinfo_fields.each do |field| diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 38f4c51e..5a08eea6 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '5.0.0' + s.version = '5.0.1' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/fixtures/test_certs/GENERATED_AT b/spec/fixtures/test_certs/GENERATED_AT new file mode 100644 index 00000000..8bfc7041 --- /dev/null +++ b/spec/fixtures/test_certs/GENERATED_AT @@ -0,0 +1 @@ +2024-12-26T22:27:15+00:00 diff --git a/spec/fixtures/test_certs/ca.crt b/spec/fixtures/test_certs/ca.crt index e9312d7c..87e96b75 100644 --- a/spec/fixtures/test_certs/ca.crt +++ b/spec/fixtures/test_certs/ca.crt @@ -1,20 +1,19 @@ -----BEGIN CERTIFICATE----- -MIIDSTCCAjGgAwIBAgIUUcAg9c8B8jiliCkOEJyqoAHrmccwDQYJKoZIhvcNAQEL -BQAwNDEyMDAGA1UEAxMpRWxhc3RpYyBDZXJ0aWZpY2F0ZSBUb29sIEF1dG9nZW5l -cmF0ZWQgQ0EwHhcNMjEwODEyMDUxNDU1WhcNMjQwODExMDUxNDU1WjA0MTIwMAYD -VQQDEylFbGFzdGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTCC -ASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAK1HuusRuGNsztd4EQvqwcMr -8XvnNNaalerpMOorCGySEFrNf0HxDIVMGMCrOv1F8SvlcGq3XANs2MJ4F2xhhLZr -PpqVHx+QnSZ66lu5R89QVSuMh/dCMxhNBlOA/dDlvy+EJBl9H791UGy/ChhSgaBd -OKVyGkhjErRTeMIq7rR7UG6GL/fV+JGy41UiLrm1KQP7/XVD9UzZfGq/hylFkTPe -oox5BUxdxUdDZ2creOID+agtIYuJVIkelKPQ+ljBY3kWBRexqJQsvyNUs1gZpjpz -YUCzuVcXDRuJXYQXGqWXhsBPfJv+ZcSyMIBUfWT/G13cWU1iwufPy0NjajowPZsC -AwEAAaNTMFEwHQYDVR0OBBYEFMgkye5+2l+TE0I6RsXRHjGBwpBGMB8GA1UdIwQY -MBaAFMgkye5+2l+TE0I6RsXRHjGBwpBGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZI -hvcNAQELBQADggEBAIgtJW8sy5lBpzPRHkmWSS/SCZIPsABW+cHqQ3e0udrI3CLB -G9n7yqAPWOBTbdqC2GM8dvAS/Twx4Bub/lWr84dFCu+t0mQq4l5kpJMVRS0KKXPL -DwJbUN3oPNYy4uPn5Xi+XY3BYFce5vwJUsqIxeAbIOxVTNx++k5DFnB0ESAM23QL -sgUZl7xl3/DkdO4oHj30gmTRW9bjCJ6umnHIiO3JoJatrprurUIt80vHC4Ndft36 -NBQ9mZpequ4RYjpSZNLcVsxyFAYwEY4g8MvH0MoMo2RRLfehmMCzXnI/Wh2qEyYz -emHprBii/5y1HieKXlX9CZRb5qEPHckDVXW3znw= +MIIDFTCCAf2gAwIBAgIBATANBgkqhkiG9w0BAQsFADA0MTIwMAYDVQQDEylFbGFz +dGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTAeFw0yNDEyMjYy +MjI3MTVaFw0yNTEyMjYyMjI3MTVaMDQxMjAwBgNVBAMTKUVsYXN0aWMgQ2VydGlm +aWNhdGUgVG9vbCBBdXRvZ2VuZXJhdGVkIENBMIIBIjANBgkqhkiG9w0BAQEFAAOC +AQ8AMIIBCgKCAQEArUe66xG4Y2zO13gRC+rBwyvxe+c01pqV6ukw6isIbJIQWs1/ +QfEMhUwYwKs6/UXxK+VwardcA2zYwngXbGGEtms+mpUfH5CdJnrqW7lHz1BVK4yH +90IzGE0GU4D90OW/L4QkGX0fv3VQbL8KGFKBoF04pXIaSGMStFN4wirutHtQboYv +99X4kbLjVSIuubUpA/v9dUP1TNl8ar+HKUWRM96ijHkFTF3FR0NnZyt44gP5qC0h +i4lUiR6Uo9D6WMFjeRYFF7GolCy/I1SzWBmmOnNhQLO5VxcNG4ldhBcapZeGwE98 +m/5lxLIwgFR9ZP8bXdxZTWLC58/LQ2NqOjA9mwIDAQABozIwMDAPBgNVHRMBAf8E +BTADAQH/MB0GA1UdDgQWBBTIJMnuftpfkxNCOkbF0R4xgcKQRjANBgkqhkiG9w0B +AQsFAAOCAQEAhfg/cmXc4Uh90yiXU8jOW8saQjTsq4ZMDQiLfJsNmNNYmHFN0vhv +lJRI1STdy7+GpjS5QbrMjQIxWSS8X8xysE4Rt81IrWmLuao35TRFyoiE1seBQ5sz +p/BxZUe57JvWi9dyzv2df4UfWFdGBhzdr80odZmz4i5VIv6qCKJKsGikcuLpepmp +E/UKnKHeR/dFWsxzA9P2OzHTUNBMOOA2PyAUL49pwoChwJeOWN/zAgwMWLbuHFG0 +IN0u8swAmeH98QdvzbhiOatGNpqfTNvQEDc19yVjfXKpBVZQ79WtronYSqrbrUa1 +T2zD8bIVP7CdddD/UmpT1SSKh4PJxudy5Q== -----END CERTIFICATE----- diff --git a/spec/fixtures/test_certs/ca.der.sha256 b/spec/fixtures/test_certs/ca.der.sha256 index 2f04c231..a2d2cd59 100644 --- a/spec/fixtures/test_certs/ca.der.sha256 +++ b/spec/fixtures/test_certs/ca.der.sha256 @@ -1 +1 @@ -195a7e7b1bc29f3d7913a918a44721704d27fa56facea0cd72a8093c7107c283 +b1e955819b0d14f64f863adb103c248ddacf2e17bea48d04ee4b57c64814ccc4 diff --git a/spec/fixtures/test_certs/es.chain.crt b/spec/fixtures/test_certs/es.chain.crt new file mode 100644 index 00000000..334de63d --- /dev/null +++ b/spec/fixtures/test_certs/es.chain.crt @@ -0,0 +1,38 @@ +-----BEGIN CERTIFICATE----- +MIIDIzCCAgugAwIBAgIBATANBgkqhkiG9w0BAQsFADA0MTIwMAYDVQQDEylFbGFz +dGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTAeFw0yNDEyMjYy +MjI3MTVaFw0yNTEyMjYyMjI3MTVaMA0xCzAJBgNVBAMTAmVzMIIBIjANBgkqhkiG +9w0BAQEFAAOCAQ8AMIIBCgKCAQEArZLZvLSWDK7Ul+AaBnjU81dsfaow8zOjCC5V +V21nXpYzQJoQbuWcvGYxwL7ZDs2ca4Wc8BVCj1NDduHuP7U+QIlUdQpl8kh5a0Zz +36pcFw7UyF51/AzWixJrht/Azzkb5cpZtE22ZK0KhS4oCsjJmTN0EABAsGhDI9/c +MjNrUC7iP0dvfOuzAPp7ufY83h98jKKXUYV24snbbvmqoWI6GQQNSG/sEo1+1UGH +/z07/mVKoBAa5DVoNGvxN0fCE7vW7hkhT8+frJcsYFatAbnf6ql0KzEa8lN9u0gR +hQNM3zcKKsjEMomBzVBc4SV3KXO0d/jGdDtlqsm2oXqlTMdtGwIDAQABo2cwZTAY +BgNVHREEETAPgg1lbGFzdGljc2VhcmNoMAkGA1UdEwQCMAAwHQYDVR0OBBYEFFQU +K+6Cg2kExRj1xSDzEi4kkgKXMB8GA1UdIwQYMBaAFMgkye5+2l+TE0I6RsXRHjGB +wpBGMA0GCSqGSIb3DQEBCwUAA4IBAQB6cZ7IrDzcAoOZgAt9RlOe2yzQeH+alttp +CSQVINjJotS1WvmtqjBB6ArqLpXIGU89TZsktNe/NQJzgYSaMnlIuHVLFdxJYmwU +T1cP6VC/brmqP/dd5y7VWE7Lp+Wd5CxKl/WY+9chmgc+a1fW/lnPEJJ6pca1Bo8b +byIL0yY2IUv4R2eh1IyQl9oGH1GOPLgO7cY04eajxYcOVA2eDSItoyDtrJfkFP/P +UXtC1JAkvWKuujFEiBj0AannhroWlp3gvChhBwCuCAU0KXD6g8BE8tn6oT1+FW7J +avSfHxAe+VHtYhF8sJ8jrdm0d7E4GKS9UR/pkLAL1JuRdJ1VkPx3 +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIDFTCCAf2gAwIBAgIBATANBgkqhkiG9w0BAQsFADA0MTIwMAYDVQQDEylFbGFz +dGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTAeFw0yNDEyMjYy +MjI3MTVaFw0yNTEyMjYyMjI3MTVaMDQxMjAwBgNVBAMTKUVsYXN0aWMgQ2VydGlm +aWNhdGUgVG9vbCBBdXRvZ2VuZXJhdGVkIENBMIIBIjANBgkqhkiG9w0BAQEFAAOC +AQ8AMIIBCgKCAQEArUe66xG4Y2zO13gRC+rBwyvxe+c01pqV6ukw6isIbJIQWs1/ +QfEMhUwYwKs6/UXxK+VwardcA2zYwngXbGGEtms+mpUfH5CdJnrqW7lHz1BVK4yH +90IzGE0GU4D90OW/L4QkGX0fv3VQbL8KGFKBoF04pXIaSGMStFN4wirutHtQboYv +99X4kbLjVSIuubUpA/v9dUP1TNl8ar+HKUWRM96ijHkFTF3FR0NnZyt44gP5qC0h +i4lUiR6Uo9D6WMFjeRYFF7GolCy/I1SzWBmmOnNhQLO5VxcNG4ldhBcapZeGwE98 +m/5lxLIwgFR9ZP8bXdxZTWLC58/LQ2NqOjA9mwIDAQABozIwMDAPBgNVHRMBAf8E +BTADAQH/MB0GA1UdDgQWBBTIJMnuftpfkxNCOkbF0R4xgcKQRjANBgkqhkiG9w0B +AQsFAAOCAQEAhfg/cmXc4Uh90yiXU8jOW8saQjTsq4ZMDQiLfJsNmNNYmHFN0vhv +lJRI1STdy7+GpjS5QbrMjQIxWSS8X8xysE4Rt81IrWmLuao35TRFyoiE1seBQ5sz +p/BxZUe57JvWi9dyzv2df4UfWFdGBhzdr80odZmz4i5VIv6qCKJKsGikcuLpepmp +E/UKnKHeR/dFWsxzA9P2OzHTUNBMOOA2PyAUL49pwoChwJeOWN/zAgwMWLbuHFG0 +IN0u8swAmeH98QdvzbhiOatGNpqfTNvQEDc19yVjfXKpBVZQ79WtronYSqrbrUa1 +T2zD8bIVP7CdddD/UmpT1SSKh4PJxudy5Q== +-----END CERTIFICATE----- diff --git a/spec/fixtures/test_certs/es.crt b/spec/fixtures/test_certs/es.crt index 228dccd3..7676eace 100644 --- a/spec/fixtures/test_certs/es.crt +++ b/spec/fixtures/test_certs/es.crt @@ -1,20 +1,19 @@ -----BEGIN CERTIFICATE----- -MIIDNjCCAh6gAwIBAgIUF9wE+oqGSbm4UVn1y9gEjzyaJFswDQYJKoZIhvcNAQEL -BQAwNDEyMDAGA1UEAxMpRWxhc3RpYyBDZXJ0aWZpY2F0ZSBUb29sIEF1dG9nZW5l -cmF0ZWQgQ0EwHhcNMjEwODEyMDUxNTI3WhcNMjQwODExMDUxNTI3WjANMQswCQYD -VQQDEwJlczCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAK2S2by0lgyu -1JfgGgZ41PNXbH2qMPMzowguVVdtZ16WM0CaEG7lnLxmMcC+2Q7NnGuFnPAVQo9T -Q3bh7j+1PkCJVHUKZfJIeWtGc9+qXBcO1MhedfwM1osSa4bfwM85G+XKWbRNtmSt -CoUuKArIyZkzdBAAQLBoQyPf3DIza1Au4j9Hb3zrswD6e7n2PN4ffIyil1GFduLJ -2275qqFiOhkEDUhv7BKNftVBh/89O/5lSqAQGuQ1aDRr8TdHwhO71u4ZIU/Pn6yX -LGBWrQG53+qpdCsxGvJTfbtIEYUDTN83CirIxDKJgc1QXOEldylztHf4xnQ7ZarJ -tqF6pUzHbRsCAwEAAaNnMGUwHQYDVR0OBBYEFFQUK+6Cg2kExRj1xSDzEi4kkgKX -MB8GA1UdIwQYMBaAFMgkye5+2l+TE0I6RsXRHjGBwpBGMBgGA1UdEQQRMA+CDWVs -YXN0aWNzZWFyY2gwCQYDVR0TBAIwADANBgkqhkiG9w0BAQsFAAOCAQEAinaknZIc -7xtQNwUwa+kdET+I4lMz+TJw9vTjGKPJqe082n81ycKU5b+a/OndG90z+dTwhShW -f0oZdIe/1rDCdiRU4ceCZA4ybKrFDIbW8gOKZOx9rsgEx9XNELj4ocZTBqxjQmNE -Ho91fli5aEm0EL2vJgejh4hcfDeElQ6go9gtvAHQ57XEADQSenvt69jOICOupnS+ -LSjDVhv/VLi3CAip0B+lD5fX/DVQdrJ62eRGuQYxoouE3saCO58qUUrKB39yD9KA -qRA/sVxyLogxaU+5dLfc0NJdOqSzStxQ2vdMvAWo9tZZ2UBGFrk5SdwCQe7Yv5mX -qi02i4q6meHGcw== +MIIDIzCCAgugAwIBAgIBATANBgkqhkiG9w0BAQsFADA0MTIwMAYDVQQDEylFbGFz +dGljIENlcnRpZmljYXRlIFRvb2wgQXV0b2dlbmVyYXRlZCBDQTAeFw0yNDEyMjYy +MjI3MTVaFw0yNTEyMjYyMjI3MTVaMA0xCzAJBgNVBAMTAmVzMIIBIjANBgkqhkiG +9w0BAQEFAAOCAQ8AMIIBCgKCAQEArZLZvLSWDK7Ul+AaBnjU81dsfaow8zOjCC5V +V21nXpYzQJoQbuWcvGYxwL7ZDs2ca4Wc8BVCj1NDduHuP7U+QIlUdQpl8kh5a0Zz +36pcFw7UyF51/AzWixJrht/Azzkb5cpZtE22ZK0KhS4oCsjJmTN0EABAsGhDI9/c +MjNrUC7iP0dvfOuzAPp7ufY83h98jKKXUYV24snbbvmqoWI6GQQNSG/sEo1+1UGH +/z07/mVKoBAa5DVoNGvxN0fCE7vW7hkhT8+frJcsYFatAbnf6ql0KzEa8lN9u0gR +hQNM3zcKKsjEMomBzVBc4SV3KXO0d/jGdDtlqsm2oXqlTMdtGwIDAQABo2cwZTAY +BgNVHREEETAPgg1lbGFzdGljc2VhcmNoMAkGA1UdEwQCMAAwHQYDVR0OBBYEFFQU +K+6Cg2kExRj1xSDzEi4kkgKXMB8GA1UdIwQYMBaAFMgkye5+2l+TE0I6RsXRHjGB +wpBGMA0GCSqGSIb3DQEBCwUAA4IBAQB6cZ7IrDzcAoOZgAt9RlOe2yzQeH+alttp +CSQVINjJotS1WvmtqjBB6ArqLpXIGU89TZsktNe/NQJzgYSaMnlIuHVLFdxJYmwU +T1cP6VC/brmqP/dd5y7VWE7Lp+Wd5CxKl/WY+9chmgc+a1fW/lnPEJJ6pca1Bo8b +byIL0yY2IUv4R2eh1IyQl9oGH1GOPLgO7cY04eajxYcOVA2eDSItoyDtrJfkFP/P +UXtC1JAkvWKuujFEiBj0AannhroWlp3gvChhBwCuCAU0KXD6g8BE8tn6oT1+FW7J +avSfHxAe+VHtYhF8sJ8jrdm0d7E4GKS9UR/pkLAL1JuRdJ1VkPx3 -----END CERTIFICATE----- diff --git a/spec/fixtures/test_certs/renew.sh b/spec/fixtures/test_certs/renew.sh new file mode 100755 index 00000000..8ef56cc9 --- /dev/null +++ b/spec/fixtures/test_certs/renew.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +set -e +cd "$(dirname "$0")" + +openssl x509 -x509toreq -in ca.crt -copy_extensions copyall -signkey ca.key -out ca.csr +openssl x509 -req -copy_extensions copyall -days 365 -in ca.csr -set_serial 0x01 -signkey ca.key -out ca.crt && rm ca.csr +openssl x509 -in ca.crt -outform der | sha256sum | awk '{print $1}' > ca.der.sha256 + +openssl x509 -x509toreq -in es.crt -copy_extensions copyall -signkey es.key -out es.csr +openssl x509 -req -copy_extensions copyall -days 365 -in es.csr -set_serial 0x01 -CA ca.crt -CAkey ca.key -out es.crt && rm es.csr +cat es.crt ca.crt > es.chain.crt + +# output ISO8601 timestamp to file +date -Iseconds > GENERATED_AT \ No newline at end of file diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index a4f67515..f64159ed 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -666,11 +666,28 @@ def synchronize_method!(object, method_name) context 'if the `docinfo_target` exist but is not of type hash' do let(:config) { base_config.merge 'docinfo' => true, "docinfo_target" => 'metadata_with_string' } let(:do_register) { false } + let(:mock_queue) { double('Queue', :<< => nil) } + let(:hit) { response.dig('hits', 'hits').first } + + it 'emits a tagged event with JSON-serialized event in [event][original]' do + allow(plugin).to receive(:logger).and_return(double('Logger').as_null_object) - it 'raises an exception if the `docinfo_target` exist but is not of type hash' do - expect(client).not_to receive(:clear_scroll) plugin.register - expect { plugin.run([]) }.to raise_error(Exception, /incompatible event/) + plugin.run(mock_queue) + + expect(mock_queue).to have_received(:<<) do |event| + expect(event).to be_a_kind_of LogStash::Event + + expect(event.get('tags')).to include("_elasticsearch_input_failure") + expect(event.get('[event][original]')).to be_a_kind_of String + expect(JSON.load(event.get('[event][original]'))).to eq hit + end + + expect(plugin.logger) + .to have_received(:warn).with( + a_string_including("Event creation error, original data now in [event][original] field"), + a_hash_including(:message => a_string_including('unable to merge docinfo fields into docinfo_target=`metadata_with_string`'), + :data => a_string_including('"_id":"C5b2xLQwTZa76jBmHIbwHQ"'))) end end @@ -1248,6 +1265,88 @@ def wait_receive_request end end + context '#push_hit' do + let(:config) do + { + 'docinfo' => true, # include ids + 'docinfo_target' => '[@metadata][docinfo]' + } + end + + let(:hit) do + JSON.load(<<~EOJSON) + { + "_index" : "test_bulk_index_2", + "_type" : "_doc", + "_id" : "sHe6A3wBesqF7ydicQvG", + "_score" : 1.0, + "_source" : { + "@timestamp" : "2021-09-20T15:02:02.557Z", + "message" : "ping", + "@version" : "17", + "sequence" : 7, + "host" : { + "name" : "maybe.local", + "ip" : "127.0.0.1" + } + } + } + EOJSON + end + + let(:mock_queue) { double('queue', :<< => nil) } + + it 'pushes a generated event to the queue' do + plugin.send(:push_hit, hit, mock_queue) + expect(mock_queue).to have_received(:<<) do |event| + expect(event).to be_a_kind_of LogStash::Event + + # fields overriding defaults + expect(event.timestamp.to_s).to eq("2021-09-20T15:02:02.557Z") + expect(event.get('@version')).to eq("17") + + # structure from hit's _source + expect(event.get('message')).to eq("ping") + expect(event.get('sequence')).to eq(7) + expect(event.get('[host][name]')).to eq("maybe.local") + expect(event.get('[host][ip]')).to eq("127.0.0.1") + + # docinfo fields + expect(event.get('[@metadata][docinfo][_index]')).to eq("test_bulk_index_2") + expect(event.get('[@metadata][docinfo][_type]')).to eq("_doc") + expect(event.get('[@metadata][docinfo][_id]')).to eq("sHe6A3wBesqF7ydicQvG") + end + end + + context 'when event creation fails' do + before(:each) do + allow(plugin).to receive(:logger).and_return(double('Logger').as_null_object) + + allow(plugin.event_factory).to receive(:new_event).and_call_original + allow(plugin.event_factory).to receive(:new_event).with(a_hash_including hit['_source']).and_raise(RuntimeError, 'intentional') + end + + it 'pushes a tagged event containing a JSON-encoded hit in [event][original]' do + plugin.send(:push_hit, hit, mock_queue) + + expect(mock_queue).to have_received(:<<) do |event| + expect(event).to be_a_kind_of LogStash::Event + + expect(event.get('tags')).to include("_elasticsearch_input_failure") + expect(event.get('[event][original]')).to be_a_kind_of String + expect(JSON.load(event.get('[event][original]'))).to eq hit + end + + expect(plugin.logger) + .to have_received(:warn).with( + a_string_including("Event creation error, original data now in [event][original] field"), + a_hash_including(:message => a_string_including('intentional'), + :data => a_string_including('"_id":"sHe6A3wBesqF7ydicQvG"'))) + + end + end + end + # @note can be removed once we depends on elasticsearch gem >= 6.x def extract_transport(client) # on 7.x client.transport is a ES::Transport::Client client.transport.respond_to?(:transport) ? client.transport.transport : client.transport diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb index 63301900..2e153495 100644 --- a/spec/inputs/integration/elasticsearch_spec.rb +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -4,7 +4,7 @@ require "logstash/inputs/elasticsearch" require_relative "../../../spec/es_helper" -describe LogStash::Inputs::Elasticsearch, :integration => true do +describe LogStash::Inputs::Elasticsearch do SECURE_INTEGRATION = ENV['SECURE_INTEGRATION'].eql? 'true' From 5f6e8da449eebae035569b878ea2bbc822905f26 Mon Sep 17 00:00:00 2001 From: Karen Metts <35154725+karenzone@users.noreply.github.com> Date: Thu, 23 Jan 2025 10:49:49 -0500 Subject: [PATCH 175/207] Doc: Remove duplicate note (#219) --- docs/index.asciidoc | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 40f2df10..f17c92e3 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -111,9 +111,6 @@ This plugin supports these configuration options plus the <> for details. -NOTE: As of version `5.0.0` of this plugin, a number of previously deprecated settings related to SSL have been removed. -Please check out <> for details. - [cols="<,<,<",options="header",] |======================================================================= |Setting |Input type|Required From 4cecce6a703e753adc26b3bdbc1c66ae0e8f7caa Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 3 Mar 2025 16:41:52 -0800 Subject: [PATCH 176/207] Adds elastic-transport client support where opens LS-core a way to start using newer ES ruby client. --- CHANGELOG.md | 3 +++ lib/logstash/inputs/elasticsearch.rb | 15 +++++++++++---- logstash-input-elasticsearch.gemspec | 4 ++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00c0b684..008f5232 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 5.0.2 + - Adds elastic-transport support [#223](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/223) + ## 5.0.1 - Fix: prevent plugin crash when hits contain illegal structure [#218](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/218) - When a hit cannot be converted to an event, the input now emits an event tagged with `_elasticsearch_input_failure` with an `[event][original]` containing a JSON-encoded string representation of the entire hit. diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index dd1f047f..2d14051a 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -13,9 +13,6 @@ require "base64" require "elasticsearch" -require "elasticsearch/transport/transport/http/manticore" -require_relative "elasticsearch/patches/_elasticsearch_transport_http_manticore" -require_relative "elasticsearch/patches/_elasticsearch_transport_connections_selector" # .Compatibility Note # [NOTE] @@ -316,7 +313,7 @@ def register @client_options = { :hosts => hosts, :transport_options => transport_options, - :transport_class => ::Elasticsearch::Transport::Transport::HTTP::Manticore, + :transport_class => get_transport_client_class, :ssl => ssl_options } @@ -642,6 +639,16 @@ def setup_query_executor end end + def get_transport_client_class + require "elasticsearch/transport/transport/http/manticore" + require_relative "elasticsearch/patches/_elasticsearch_transport_http_manticore" + require_relative "elasticsearch/patches/_elasticsearch_transport_connections_selector" + ::Elasticsearch::Transport::Transport::HTTP::Manticore + rescue ::LoadError + require "elastic/transport/transport/http/manticore" + ::Elastic::Transport::Transport::HTTP::Manticore + end + module URIOrEmptyValidator ## # @override to provide :uri_or_empty validator diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 5a08eea6..20cb76ca 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,13 +1,13 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '5.0.1' + s.version = '5.0.2' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" s.authors = ["Elastic"] s.email = 'info@elastic.co' - s.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html" + s.homepage = "https://elastic.co/logstash" s.require_paths = ["lib"] # Files From dfaf929029327ff6d5dcb30089e0a97c0bfa237b Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 3 Mar 2025 22:34:59 -0800 Subject: [PATCH 177/207] Import manticore to resolve uninitialized Manticore error. --- CHANGELOG.md | 2 +- lib/logstash/inputs/elasticsearch.rb | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 008f5232..f10750cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ ## 5.0.2 - - Adds elastic-transport support [#223](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/223) + - Add elastic-transport client support [#223](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/223) ## 5.0.1 - Fix: prevent plugin crash when hits contain illegal structure [#218](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/218) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 2d14051a..e1d97060 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -13,6 +13,7 @@ require "base64" require "elasticsearch" +require "manticore" # .Compatibility Note # [NOTE] From 52badf6badd19e1b647244f0ef094d8904fba5ea Mon Sep 17 00:00:00 2001 From: Mashhur Date: Tue, 11 Mar 2025 11:26:01 -0700 Subject: [PATCH 178/207] Be specific with elasticsearch gem to consume up to 9 series. Add comments on the logics to support both elasticsearch and elastic transport gems. --- lib/logstash/inputs/elasticsearch.rb | 4 ++++ logstash-input-elasticsearch.gemspec | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index e1d97060..c9d8b552 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -641,6 +641,10 @@ def setup_query_executor end def get_transport_client_class + # LS-core includes `elasticsearch` gem. The gem is composed of two separate gems: `elasticsearch-api` and `elasticsearch-transport` + # And now `elasticsearch-transport` is old, instead we have `elastic-transport`. + # LS-core updated `elasticsearch` > 8: https://github.com/elastic/logstash/pull/17161 + # Following source bits are for the compatibility to support both `elasticsearch-transport` and `elastic-transport` gems require "elasticsearch/transport/transport/http/manticore" require_relative "elasticsearch/patches/_elasticsearch_transport_http_manticore" require_relative "elasticsearch/patches/_elasticsearch_transport_connections_selector" diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 20cb76ca..8e36201b 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -26,7 +26,7 @@ Gem::Specification.new do |s| s.add_runtime_dependency "logstash-mixin-validator_support", '~> 1.0' s.add_runtime_dependency "logstash-mixin-scheduler", '~> 1.0' - s.add_runtime_dependency 'elasticsearch', '>= 7.17.9' + s.add_runtime_dependency 'elasticsearch', '>= 7.17.9', '< 9' s.add_runtime_dependency 'logstash-mixin-ca_trusted_fingerprint_support', '~> 1.0' s.add_runtime_dependency 'logstash-mixin-normalize_config_support', '~>1.0' From be39bd34a9b776e4c19aabf4b4193ee919a9fb32 Mon Sep 17 00:00:00 2001 From: Mashhur <99575341+mashhurs@users.noreply.github.com> Date: Tue, 11 Mar 2025 11:26:38 -0700 Subject: [PATCH 179/207] Update CHANGELOG.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: João Duarte --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f10750cd..bc00c0ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ ## 5.0.2 - - Add elastic-transport client support [#223](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/223) + - Add elastic-transport client support used in elasticsearch-ruby 8.x [#223](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/223) ## 5.0.1 - Fix: prevent plugin crash when hits contain illegal structure [#218](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/218) From 5471b740b04daaa39a3cd14de90dc2be64a681ec Mon Sep 17 00:00:00 2001 From: Mashhur Date: Sun, 16 Mar 2025 21:55:21 -0700 Subject: [PATCH 180/207] Fix the unit test failures casued by #223 --- spec/inputs/elasticsearch_spec.rb | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index f64159ed..3fe067a5 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -92,9 +92,11 @@ before do allow(Elasticsearch::Client).to receive(:new).and_return(es_client) - allow(es_client).to receive(:info).and_raise( - Elasticsearch::Transport::Transport::Errors::BadRequest.new - ) + begin + allow(es_client).to receive(:info).and_raise(Elasticsearch::Transport::Transport::Errors::BadRequest.new) + rescue NameError # NameError: uninitialized constant Elasticsearch::Transport + allow(es_client).to receive(:info).and_raise(Elastic::Transport::Transport::Errors::BadRequest.new) + end end it "raises an exception" do @@ -744,8 +746,13 @@ def synchronize_method!(object, method_name) it "should set host(s)" do plugin.register client = plugin.send(:client) - - expect( client.transport.instance_variable_get(:@seeds) ).to eql [{ + target_field = :@seeds + begin + Elasticsearch::Transport::Client + rescue + target_field = :@hosts + end + expect( client.transport.instance_variable_get(target_field) ).to eql [{ :scheme => "https", :host => "ac31ebb90241773157043c34fd26fd46.us-central1.gcp.cloud.es.io", :port => 9243, From ac9660bb5b19924472ca6cb9153872d1cf34a184 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 17 Mar 2025 10:10:56 -0700 Subject: [PATCH 181/207] Check es-ruby client v8 availibility with a separate method in the spec. --- spec/inputs/elasticsearch_spec.rb | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 3fe067a5..5067400e 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -21,6 +21,13 @@ let(:es_version) { "7.5.0" } let(:cluster_info) { {"version" => {"number" => es_version, "build_flavor" => build_flavor}, "tagline" => "You Know, for Search"} } + def elastic_ruby_v8_client_available? + Elasticsearch::Transport + false + rescue NameError # NameError: uninitialized constant Elasticsearch::Transport if Elastic Ruby client is not available + true + end + before(:each) do Elasticsearch::Client.send(:define_method, :ping) { } # define no-action ping method allow_any_instance_of(Elasticsearch::Client).to receive(:info).and_return(cluster_info) @@ -92,10 +99,10 @@ before do allow(Elasticsearch::Client).to receive(:new).and_return(es_client) - begin - allow(es_client).to receive(:info).and_raise(Elasticsearch::Transport::Transport::Errors::BadRequest.new) - rescue NameError # NameError: uninitialized constant Elasticsearch::Transport + if elastic_ruby_v8_client_available? allow(es_client).to receive(:info).and_raise(Elastic::Transport::Transport::Errors::BadRequest.new) + else + allow(es_client).to receive(:info).and_raise(Elasticsearch::Transport::Transport::Errors::BadRequest.new) end end From d9bf3754096505a8f9baf1566dd484c778794720 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Duarte?= Date: Mon, 7 Apr 2025 12:58:43 +0100 Subject: [PATCH 182/207] Introduce cursor tracking akin to jdbc input (#205) Provide field value tracking, persisted to disk on each search_after page. Adds `:last_value` and `:present` placeholders, allowing the plugin to inject the cursor value and now-30 seconds, respectively, in the query string. Useful to track new data being written to an index or series of indices. Works best with nano second precision timestamps added by Elasticsearch's Ingest Pipelines. --------- Co-authored-by: Joel Andritsch Co-authored-by: Rob Bavey Co-authored-by: Karen Metts <35154725+karenzone@users.noreply.github.com> --- CHANGELOG.md | 3 + docs/index.asciidoc | 179 +++++++++++++++++- lib/logstash/inputs/elasticsearch.rb | 64 ++++++- .../inputs/elasticsearch/aggregation.rb | 19 +- .../inputs/elasticsearch/cursor_tracker.rb | 58 ++++++ .../inputs/elasticsearch/paginated_search.rb | 14 +- logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/cursor_tracker_spec.rb | 72 +++++++ spec/inputs/elasticsearch_spec.rb | 6 +- spec/inputs/integration/elasticsearch_spec.rb | 10 +- 10 files changed, 411 insertions(+), 16 deletions(-) create mode 100644 lib/logstash/inputs/elasticsearch/cursor_tracker.rb create mode 100644 spec/inputs/cursor_tracker_spec.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index bc00c0ce..f4b4dc23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 5.1.0 + - Add "cursor"-like index tracking [#205](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/205) + ## 5.0.2 - Add elastic-transport client support used in elasticsearch-ruby 8.x [#223](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/223) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index f17c92e3..6478dff5 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -48,7 +48,7 @@ This would create an Elasticsearch query with the following format: "sort": [ "_doc" ] }' - +[id="plugins-{type}s-{plugin}-scheduling"] ==== Scheduling Input from this plugin can be scheduled to run periodically according to a specific @@ -103,6 +103,133 @@ Common causes are: - When the hit result contains top-level fields that are {logstash-ref}/processing.html#reserved-fields[reserved in Logstash] but do not have the expected shape. Use the <> directive to avoid conflicts with the top-level namespace. - When <> is enabled and the docinfo fields cannot be merged into the hit result. Combine <> and <> to avoid conflict. +[id="plugins-{type}s-{plugin}-cursor"] +==== Tracking a field's value across runs + +.Technical Preview: Tracking a field's value +**** +The feature that allows tracking a field's value across runs is in _Technical Preview_. +Configuration options and implementation details are subject to change in minor releases without being preceded by deprecation warnings. +**** + +Some uses cases require tracking the value of a particular field between two jobs. +Examples include: + +* avoiding the need to re-process the entire result set of a long query after an unplanned restart +* grabbing only new data from an index instead of processing the entire set on each job. + +The Elasticsearch input plugin provides the <> and <> options. +When <> is set, the plugin records the value of that field for the last document retrieved in a run into +a file. +(The file location defaults to <>.) + +You can then inject this value in the query using the placeholder `:last_value`. +The value will be injected into the query before execution, and then updated after the query completes if new data was found. + +This feature works best when: + +* the query sorts by the tracking field, +* the timestamp field is added by {es}, and +* the field type has enough resolution so that two events are unlikely to have the same value. + +Consider using a tracking field whose type is https://www.elastic.co/guide/en/elasticsearch/reference/current/date_nanos.html[date nanoseconds]. +If the tracking field is of this data type, you can use an extra placeholder called `:present` to inject the nano-second based value of "now-30s". +This placeholder is useful as the right-hand side of a range filter, allowing the collection of +new data but leaving partially-searchable bulk request data to the next scheduled job. + +[id="plugins-{type}s-{plugin}-tracking-sample"] +===== Sample configuration: Track field value across runs + +This section contains a series of steps to help you set up the "tailing" of data being written to a set of indices, using a date nanosecond field added by an Elasticsearch ingest pipeline and the `tracking_field` capability of this plugin. + +. Create ingest pipeline that adds Elasticsearch's `_ingest.timestamp` field to the documents as `event.ingested`: ++ +[source, json] + PUT _ingest/pipeline/my-pipeline + { + "processors": [ + { + "script": { + "lang": "painless", + "source": "ctx.putIfAbsent(\"event\", [:]); ctx.event.ingested = metadata().now.format(DateTimeFormatter.ISO_INSTANT);" + } + } + ] + } + +[start=2] +. Create an index mapping where the tracking field is of date nanosecond type and invokes the defined pipeline: ++ +[source, json] + PUT /_template/my_template + { + "index_patterns": ["test-*"], + "settings": { + "index.default_pipeline": "my-pipeline", + }, + "mappings": { + "properties": { + "event": { + "properties": { + "ingested": { + "type": "date_nanos", + "format": "strict_date_optional_time_nanos" + } + } + } + } + } + } + +[start=3] +. Define a query that looks at all data of the indices, sorted by the tracking field, and with a range filter since the last value seen until present: ++ +[source,json] +{ + "query": { + "range": { + "event.ingested": { + "gt": ":last_value", + "lt": ":present" + } + } + }, + "sort": [ + { + "event.ingested": { + "order": "asc", + "format": "strict_date_optional_time_nanos", + "numeric_type": "date_nanos" + } + } + ] +} + +[start=4] +. Configure the Elasticsearch input to query the indices with the query defined above, every minute, and track the `event.ingested` field: ++ +[source, ruby] + input { + elasticsearch { + id => tail_test_index + hosts => [ 'https://../'] + api_key => '....' + index => 'test-*' + query => '{ "query": { "range": { "event.ingested": { "gt": ":last_value", "lt": ":present"}}}, "sort": [ { "event.ingested": {"order": "asc", "format": "strict_date_optional_time_nanos", "numeric_type" : "date_nanos" } } ] }' + tracking_field => "[event][ingested]" + slices => 5 # optional use of slices to speed data processing, should be equal to or less than number of primary shards + schedule => '* * * * *' # every minute + schedule_overlap => false # don't accumulate jobs if one takes longer than 1 minute + } + } + +With this sample setup, new documents are indexed into a `test-*` index. +The next scheduled run: + +* selects all new documents since the last observed value of the tracking field, +* uses {ref}/point-in-time-api.html#point-in-time-api[Point in time (PIT)] + {ref}/paginate-search-results.html#search-after[Search after] to paginate through all the data, and +* updates the value of the field at the end of the pagination. + [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Input configuration options @@ -126,12 +253,14 @@ Please check out <> for details. | <> |<>|No | <> |<>|No | <> |<>|No +| <> |<>|No | <> |<>|No | <> |<>|No | <> |<>|No | <> |<>, one of `["hits","aggregations"]`|No | <> | <>|No | <> |<>|No +| <> |<>|No | <> |<>|No | <> |<>, one of `["auto", "search_after", "scroll"]`|No | <> |<>|No @@ -151,6 +280,8 @@ Please check out <> for details. | <> |<>, one of `["full", "none"]`|No | <> | <>|No | <> | {logstash-ref}/field-references-deepdive.html[field reference] | No +| <> |<>|No +| <> |<>|No | <> | <>|No | <> |<>|No |======================================================================= @@ -330,6 +461,17 @@ Check out {ref}/api-conventions.html#api-multi-index[Multi Indices documentation] in the Elasticsearch documentation for info on referencing multiple indices. +[id="plugins-{type}s-{plugin}-last_run_metadata_path"] +===== `last_run_metadata_path` + + * Value type is <> + * There is no default value for this setting. + +The path to store the last observed value of the tracking field, when used. +By default this file is stored as `/plugins/inputs/elasticsearch//last_run_value`. + +This setting should point to file, not a directory, and Logstash must have read+write access to this file. + [id="plugins-{type}s-{plugin}-password"] ===== `password` @@ -410,6 +552,19 @@ for example: "* * * * *" (execute query every minute, on the minute) There is no schedule by default. If no schedule is given, then the statement is run exactly once. +[id="plugins-{type}s-{plugin}-schedule_overlap"] +===== `schedule_overlap` + + * Value type is <> + * Default value is `true` + +Whether to allow queuing of a scheduled run if a run is occurring. +While this is ideal for ensuring a new run happens immediately after the previous on finishes if there +is a lot of work to do, but given the queue is unbounded it may lead to an out of memory over long periods of time +if the queue grows continuously. + +When in doubt, set `schedule_overlap` to false (it may become the default value in the future). + [id="plugins-{type}s-{plugin}-scroll"] ===== `scroll` @@ -622,6 +777,28 @@ When the `target` is set to a field reference, the `_source` of the hit is place This option can be useful to avoid populating unknown fields when a downstream schema such as ECS is enforced. It is also possible to target an entry in the event's metadata, which will be available during event processing but not exported to your outputs (e.g., `target \=> "[@metadata][_source]"`). +[id="plugins-{type}s-{plugin}-tracking_field"] +===== `tracking_field` + +* Value type is <> +* There is no default value for this setting. + +Which field from the last event of a previous run will be used a cursor value for the following run. +The value of this field is injected into each query if the query uses the placeholder `:last_value`. +For the first query after a pipeline is started, the value used is either read from <> file, +or taken from <> setting. + +Note: The tracking value is updated after each page is read and at the end of each Point in Time. In case of a crash the last saved value will be used so some duplication of data can occur. For this reason the use of unique document IDs for each event is recommended in the downstream destination. + +[id="plugins-{type}s-{plugin}-tracking_field_seed"] +===== `tracking_field_seed` + +* Value type is <> +* Default value is `"1970-01-01T00:00:00.000000000Z"` + +The starting value for the <> if there is no <> already. +This field defaults to the nanosecond precision ISO8601 representation of `epoch`, or "1970-01-01T00:00:00.000000000Z", given nano-second precision timestamps are the +most reliable data format to use for this feature. [id="plugins-{type}s-{plugin}-user"] ===== `user` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index c9d8b552..564acc6a 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -73,6 +73,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base require 'logstash/inputs/elasticsearch/paginated_search' require 'logstash/inputs/elasticsearch/aggregation' + require 'logstash/inputs/elasticsearch/cursor_tracker' include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1) include LogStash::PluginMixins::ECSCompatibilitySupport::TargetCheck @@ -124,6 +125,20 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # by this pipeline input. config :slices, :validate => :number + # Enable tracking the value of a given field to be used as a cursor + # Main concerns: + # * using anything other than _event.timestamp easily leads to data loss + # * the first "synchronization run can take a long time" + config :tracking_field, :validate => :string + + # Define the initial seed value of the tracking_field + config :tracking_field_seed, :validate => :string, :default => "1970-01-01T00:00:00.000000000Z" + + # The location of where the tracking field value will be stored + # The value is persisted after each scheduled run (and not per result) + # If it's not set it defaults to '${path.data}/plugins/inputs/elasticsearch//last_run_value' + config :last_run_metadata_path, :validate => :string + # If set, include Elasticsearch document information such as index, type, and # the id in the event. # @@ -250,6 +265,10 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # exactly once. config :schedule, :validate => :string + # Allow scheduled runs to overlap (enabled by default). Setting to false will + # only start a new scheduled run after the previous one completes. + config :schedule_overlap, :validate => :boolean + # If set, the _source of each hit will be added nested under the target instead of at the top-level config :target, :validate => :field_reference @@ -328,16 +347,30 @@ def register setup_query_executor + setup_cursor_tracker + @client end def run(output_queue) if @schedule - scheduler.cron(@schedule) { @query_executor.do_run(output_queue) } + scheduler.cron(@schedule, :overlap => @schedule_overlap) do + @query_executor.do_run(output_queue, get_query_object()) + end scheduler.join else - @query_executor.do_run(output_queue) + @query_executor.do_run(output_queue, get_query_object()) + end + end + + def get_query_object + if @cursor_tracker + query = @cursor_tracker.inject_cursor(@query) + @logger.debug("new query is #{query}") + else + query = @query end + LogStash::Json.load(query) end ## @@ -347,6 +380,11 @@ def push_hit(hit, output_queue, root_field = '_source') event = event_from_hit(hit, root_field) decorate(event) output_queue << event + record_last_value(event) + end + + def record_last_value(event) + @cursor_tracker.record_last_value(event) if @tracking_field end def event_from_hit(hit, root_field) @@ -640,6 +678,28 @@ def setup_query_executor end end + def setup_cursor_tracker + return unless @tracking_field + return unless @query_executor.is_a?(LogStash::Inputs::Elasticsearch::SearchAfter) + + if @resolved_search_api != "search_after" || @response_type != "hits" + raise ConfigurationError.new("The `tracking_field` feature can only be used with `search_after` non-aggregation queries") + end + + @cursor_tracker = CursorTracker.new(last_run_metadata_path: last_run_metadata_path, + tracking_field: @tracking_field, + tracking_field_seed: @tracking_field_seed) + @query_executor.cursor_tracker = @cursor_tracker + end + + def last_run_metadata_path + return @last_run_metadata_path if @last_run_metadata_path + + last_run_metadata_path = ::File.join(LogStash::SETTINGS.get_value("path.data"), "plugins", "inputs", "elasticsearch", pipeline_id, "last_run_value") + FileUtils.mkdir_p ::File.dirname(last_run_metadata_path) + last_run_metadata_path + end + def get_transport_client_class # LS-core includes `elasticsearch` gem. The gem is composed of two separate gems: `elasticsearch-api` and `elasticsearch-transport` # And now `elasticsearch-transport` is old, instead we have `elastic-transport`. diff --git a/lib/logstash/inputs/elasticsearch/aggregation.rb b/lib/logstash/inputs/elasticsearch/aggregation.rb index e74a4357..91c12443 100644 --- a/lib/logstash/inputs/elasticsearch/aggregation.rb +++ b/lib/logstash/inputs/elasticsearch/aggregation.rb @@ -12,14 +12,9 @@ def initialize(client, plugin) @client = client @plugin_params = plugin.params + @index = @plugin_params["index"] @size = @plugin_params["size"] - @query = @plugin_params["query"] @retries = @plugin_params["retries"] - @agg_options = { - :index => @plugin_params["index"], - :size => 0 - }.merge(:body => @query) - @plugin = plugin end @@ -33,10 +28,18 @@ def retryable(job_name, &block) false end - def do_run(output_queue) + def aggregation_options(query_object) + { + :index => @index, + :size => 0, + :body => query_object + } + end + + def do_run(output_queue, query_object) logger.info("Aggregation starting") r = retryable(AGGREGATION_JOB) do - @client.search(@agg_options) + @client.search(aggregation_options(query_object)) end @plugin.push_hit(r, output_queue, 'aggregations') if r end diff --git a/lib/logstash/inputs/elasticsearch/cursor_tracker.rb b/lib/logstash/inputs/elasticsearch/cursor_tracker.rb new file mode 100644 index 00000000..d43b1fd8 --- /dev/null +++ b/lib/logstash/inputs/elasticsearch/cursor_tracker.rb @@ -0,0 +1,58 @@ +require 'fileutils' + +module LogStash; module Inputs; class Elasticsearch + class CursorTracker + include LogStash::Util::Loggable + + attr_reader :last_value + + def initialize(last_run_metadata_path:, tracking_field:, tracking_field_seed:) + @last_run_metadata_path = last_run_metadata_path + @last_value_hashmap = Java::java.util.concurrent.ConcurrentHashMap.new + @last_value = IO.read(@last_run_metadata_path) rescue nil || tracking_field_seed + @tracking_field = tracking_field + logger.info "Starting value for cursor field \"#{@tracking_field}\": #{@last_value}" + @mutex = Mutex.new + end + + def checkpoint_cursor(intermediate: true) + @mutex.synchronize do + if intermediate + # in intermediate checkpoints pick the smallest + converge_last_value {|v1, v2| v1 < v2 ? v1 : v2} + else + # in the last search of a PIT choose the largest + converge_last_value {|v1, v2| v1 > v2 ? v1 : v2} + @last_value_hashmap.clear + end + IO.write(@last_run_metadata_path, @last_value) + end + end + + def converge_last_value(&block) + return if @last_value_hashmap.empty? + new_last_value = @last_value_hashmap.reduceValues(1000, &block) + logger.debug? && logger.debug("converge_last_value: got #{@last_value_hashmap.values.inspect}. won: #{new_last_value}") + return if new_last_value == @last_value + @last_value = new_last_value + logger.info "New cursor value for field \"#{@tracking_field}\" is: #{new_last_value}" + end + + def record_last_value(event) + value = event.get(@tracking_field) + logger.trace? && logger.trace("storing last_value if #{@tracking_field} for #{Thread.current.object_id}: #{value}") + @last_value_hashmap.put(Thread.current.object_id, value) + end + + def inject_cursor(query_json) + # ":present" means "now - 30s" to avoid grabbing partially visible data in the PIT + result = query_json.gsub(":last_value", @last_value.to_s).gsub(":present", now_minus_30s) + logger.debug("inject_cursor: injected values for ':last_value' and ':present'", :query => result) + result + end + + def now_minus_30s + Java::java.time.Instant.now.minusSeconds(30).to_s + end + end +end; end; end diff --git a/lib/logstash/inputs/elasticsearch/paginated_search.rb b/lib/logstash/inputs/elasticsearch/paginated_search.rb index 2e8236bc..dd66b2c0 100644 --- a/lib/logstash/inputs/elasticsearch/paginated_search.rb +++ b/lib/logstash/inputs/elasticsearch/paginated_search.rb @@ -21,9 +21,10 @@ def initialize(client, plugin) @pipeline_id = plugin.pipeline_id end - def do_run(output_queue) - return retryable_search(output_queue) if @slices.nil? || @slices <= 1 + def do_run(output_queue, query) + @query = query + return retryable_search(output_queue) if @slices.nil? || @slices <= 1 retryable_slice_search(output_queue) end @@ -122,6 +123,13 @@ class SearchAfter < PaginatedSearch PIT_JOB = "create point in time (PIT)" SEARCH_AFTER_JOB = "search_after paginated search" + attr_accessor :cursor_tracker + + def do_run(output_queue, query) + super(output_queue, query) + @cursor_tracker.checkpoint_cursor(intermediate: false) if @cursor_tracker + end + def pit?(id) !!id&.is_a?(String) end @@ -192,6 +200,8 @@ def search(output_queue:, slice_id: nil, pit_id:) end end + @cursor_tracker.checkpoint_cursor(intermediate: true) if @cursor_tracker + logger.info("Query completed", log_details) end diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 8e36201b..469f4d88 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '5.0.2' + s.version = '5.1.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/cursor_tracker_spec.rb b/spec/inputs/cursor_tracker_spec.rb new file mode 100644 index 00000000..291d6c61 --- /dev/null +++ b/spec/inputs/cursor_tracker_spec.rb @@ -0,0 +1,72 @@ +# encoding: utf-8 +require "logstash/devutils/rspec/spec_helper" +require "logstash/devutils/rspec/shared_examples" +require "logstash/inputs/elasticsearch" +require "logstash/inputs/elasticsearch/cursor_tracker" + +describe LogStash::Inputs::Elasticsearch::CursorTracker do + + let(:last_run_metadata_path) { Tempfile.new('cursor_tracker_testing').path } + let(:tracking_field_seed) { "1980-01-01T23:59:59.999999999Z" } + let(:options) do + { + :last_run_metadata_path => last_run_metadata_path, + :tracking_field => "my_field", + :tracking_field_seed => tracking_field_seed + } + end + + subject { described_class.new(**options) } + + it "creating a class works" do + expect(subject).to be_a described_class + end + + describe "checkpoint_cursor" do + before(:each) do + subject.checkpoint_cursor(intermediate: false) # store seed value + [ + Thread.new(subject) {|subject| subject.record_last_value(LogStash::Event.new("my_field" => "2025-01-03T23:59:59.999999999Z")) }, + Thread.new(subject) {|subject| subject.record_last_value(LogStash::Event.new("my_field" => "2025-01-01T23:59:59.999999999Z")) }, + Thread.new(subject) {|subject| subject.record_last_value(LogStash::Event.new("my_field" => "2025-01-02T23:59:59.999999999Z")) }, + ].each(&:join) + end + context "when doing intermediate checkpoint" do + it "persists the smallest value" do + subject.checkpoint_cursor(intermediate: true) + expect(IO.read(last_run_metadata_path)).to eq("2025-01-01T23:59:59.999999999Z") + end + end + context "when doing non-intermediate checkpoint" do + it "persists the largest value" do + subject.checkpoint_cursor(intermediate: false) + expect(IO.read(last_run_metadata_path)).to eq("2025-01-03T23:59:59.999999999Z") + end + end + end + + describe "inject_cursor" do + let(:new_value) { "2025-01-03T23:59:59.999999999Z" } + let(:fake_now) { "2026-09-19T23:59:59.999999999Z" } + + let(:query) do + %q[ + { "query": { "range": { "event.ingested": { "gt": :last_value, "lt": :present}}}, "sort": [ { "event.ingested": {"order": "asc", "format": "strict_date_optional_time_nanos", "numeric_type" : "date_nanos" } } ] } + ] + end + + before(:each) do + subject.record_last_value(LogStash::Event.new("my_field" => new_value)) + subject.checkpoint_cursor(intermediate: false) + allow(subject).to receive(:now_minus_30s).and_return(fake_now) + end + + it "injects the value of the cursor into json query if it contains :last_value" do + expect(subject.inject_cursor(query)).to match(/#{new_value}/) + end + + it "injects current time into json query if it contains :present" do + expect(subject.inject_cursor(query)).to match(/#{fake_now}/) + end + end +end diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 5067400e..0c07992c 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1165,7 +1165,7 @@ def wait_receive_request context "when there's an exception" do before(:each) do - allow(client).to receive(:search).and_raise RuntimeError + allow(client).to receive(:search).and_raise RuntimeError.new("test exception") end it 'produces no events' do plugin.run queue @@ -1310,6 +1310,10 @@ def wait_receive_request let(:mock_queue) { double('queue', :<< => nil) } + before(:each) do + plugin.send(:setup_cursor_tracker) + end + it 'pushes a generated event to the queue' do plugin.send(:push_hit, hit, mock_queue) expect(mock_queue).to have_received(:<<) do |event| diff --git a/spec/inputs/integration/elasticsearch_spec.rb b/spec/inputs/integration/elasticsearch_spec.rb index 2e153495..a7c45bd5 100644 --- a/spec/inputs/integration/elasticsearch_spec.rb +++ b/spec/inputs/integration/elasticsearch_spec.rb @@ -76,6 +76,14 @@ shared_examples 'secured_elasticsearch' do it_behaves_like 'an elasticsearch index plugin' + let(:unauth_exception_class) do + begin + Elasticsearch::Transport::Transport::Errors::Unauthorized + rescue + Elastic::Transport::Transport::Errors::Unauthorized + end + end + context "incorrect auth credentials" do let(:config) do @@ -85,7 +93,7 @@ let(:queue) { [] } it "fails to run the plugin" do - expect { plugin.register }.to raise_error Elasticsearch::Transport::Transport::Errors::Unauthorized + expect { plugin.register }.to raise_error unauth_exception_class end end end From a307db2d77708bc5b14a78aed679a1e2c2742e10 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Tue, 8 Apr 2025 07:36:21 -0700 Subject: [PATCH 183/207] ES|QL support: ESQL executor implementation, response type to accept esql option, validations to make sure both LS and ES support the ESQL execution. --- CHANGELOG.md | 3 + lib/logstash/inputs/elasticsearch.rb | 54 ++++++++++- lib/logstash/inputs/elasticsearch/esql.rb | 79 ++++++++++++++++ logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_esql_spec.rb | 96 ++++++++++++++++++++ spec/inputs/elasticsearch_spec.rb | 106 ++++++++++++++++++++++ 6 files changed, 334 insertions(+), 6 deletions(-) create mode 100644 lib/logstash/inputs/elasticsearch/esql.rb create mode 100644 spec/inputs/elasticsearch_esql_spec.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index f4b4dc23..d63c5df0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 5.2.0 + - ES|QL support [#233](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/233) + ## 5.1.0 - Add "cursor"-like index tracking [#205](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/205) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 564acc6a..736c25dc 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -74,6 +74,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base require 'logstash/inputs/elasticsearch/paginated_search' require 'logstash/inputs/elasticsearch/aggregation' require 'logstash/inputs/elasticsearch/cursor_tracker' + require 'logstash/inputs/elasticsearch/esql' include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1) include LogStash::PluginMixins::ECSCompatibilitySupport::TargetCheck @@ -104,7 +105,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # This allows you to speccify the response type: either hits or aggregations # where hits: normal search request # aggregations: aggregation request - config :response_type, :validate => ['hits', 'aggregations'], :default => 'hits' + config :response_type, :validate => %w[hits aggregations esql], :default => 'hits' # This allows you to set the maximum number of hits returned per scroll. config :size, :validate => :number, :default => 1000 @@ -302,10 +303,17 @@ def register fill_hosts_from_cloud_id setup_ssl_params! - @base_query = LogStash::Json.load(@query) - if @slices - @base_query.include?('slice') && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `query` option cannot specify specific `slice` when configured to manage parallel slices with `slices` option") - @slices < 1 && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `slices` option must be greater than zero, got `#{@slices}`") + if @response_type == 'esql' + validate_ls_version_for_esql_support! + validate_esql_query! + inform_ineffective_esql_params + else + # for the ES|QL, plugin accepts raw string query but JSON for others + @base_query = LogStash::Json.load(@query) + if @slices + @base_query.include?('slice') && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `query` option cannot specify specific `slice` when configured to manage parallel slices with `slices` option") + @slices < 1 && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `slices` option must be greater than zero, got `#{@slices}`") + end end @retries < 0 && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `retries` option must be equal or greater than zero, got `#{@retries}`") @@ -341,6 +349,9 @@ def register test_connection! + # make sure connected ES supports ES|QL (8.11+) + validate_es_for_esql_support! if @response_type == 'esql' + setup_serverless setup_search_api @@ -398,6 +409,12 @@ def event_from_hit(hit, root_field) return event_factory.new_event('event' => { 'original' => serialized_hit }, 'tags' => ['_elasticsearch_input_failure']) end + def decorate_and_push_to_queue(output_queue, mapped_entry) + event = targeted_event_factory.new_event mapped_entry + decorate(event) + output_queue << event + end + def set_docinfo_fields(hit, event) # do not assume event[@docinfo_target] to be in-place updatable. first get it, update it, then at the end set it in the event. docinfo_target = event.get(@docinfo_target) || {} @@ -675,6 +692,8 @@ def setup_query_executor end when 'aggregations' LogStash::Inputs::Elasticsearch::Aggregation.new(@client, self) + when 'esql' + LogStash::Inputs::Elasticsearch::Esql.new(@client, self) end end @@ -714,6 +733,31 @@ def get_transport_client_class ::Elastic::Transport::Transport::HTTP::Manticore end + def validate_ls_version_for_esql_support! + # LS 8.17.4+ has elasticsearch-ruby 8.17 client + # elasticsearch-ruby 8.11+ supports ES|QL + if Gem::Version.create(LOGSTASH_VERSION) < Gem::Version.create("8.17.4") + fail("Current version of Logstash does not include Elasticsearch client which supports ES|QL. Please upgrade Logstash to at least 8.17.4") + end + end + + def validate_esql_query! + fail(LogStash::ConfigurationError, "`query` cannot be empty") if @query.strip.empty? + source_commands = %w[FROM ROW SHOW] + contains_source_command = source_commands.any? { |source_command| @query.strip.start_with?(source_command) } + fail(LogStash::ConfigurationError, "`query` needs to start with any of #{source_commands}") unless contains_source_command + end + + def inform_ineffective_esql_params + ineffective_options = original_params.keys & %w(target size slices search_api) + @logger.info("Configured #{ineffective_options} params are ineffective in ES|QL mode") if ineffective_options.size > 1 + end + + def validate_es_for_esql_support! + es_supports_esql = Gem::Version.create(es_version) >= Gem::Version.create("8.11") + fail("Connected Elasticsearch #{es_version} version does not supports ES|QL. Please upgrade it.") unless es_supports_esql + end + module URIOrEmptyValidator ## # @override to provide :uri_or_empty validator diff --git a/lib/logstash/inputs/elasticsearch/esql.rb b/lib/logstash/inputs/elasticsearch/esql.rb new file mode 100644 index 00000000..dbcaacf9 --- /dev/null +++ b/lib/logstash/inputs/elasticsearch/esql.rb @@ -0,0 +1,79 @@ +require 'logstash/helpers/loggable_try' + +module LogStash + module Inputs + class Elasticsearch + class Esql + include LogStash::Util::Loggable + + ESQL_JOB = "ES|QL job" + + # Initialize the ESQL query executor + # @param client [Elasticsearch::Client] The Elasticsearch client instance + # @param plugin [LogStash::Inputs::Elasticsearch] The parent plugin instance + def initialize(client, plugin) + @client = client + @plugin_params = plugin.params + @plugin = plugin + @retries = @plugin_params["retries"] + @query = @plugin_params["query"] + end + + # Execute the ESQL query and process results + # @param output_queue [Queue] The queue to push processed events to + def do_run(output_queue) + logger.info("ES|QL executor starting") + response = retryable(ESQL_JOB) do + @client.esql.query({ body: { query: @query }, format: 'json' }) + end + # retriable already printed error details + return if response == false + + if response&.headers&.dig("warning") + logger.warn("ES|QL executor received warning", {:message => response.headers["warning"]}) + end + if response['values'] && response['columns'] + process_response(response['values'], response['columns'], output_queue) + end + end + + # Execute a retryable operation with proper error handling + # @param job_name [String] Name of the job for logging purposes + # @yield The block to execute + # @return [Boolean] true if successful, false otherwise + def retryable(job_name, &block) + stud_try = ::LogStash::Helpers::LoggableTry.new(logger, job_name) + stud_try.try((@retries + 1).times) { yield } + rescue => e + error_details = {:message => e.message, :cause => e.cause} + error_details[:backtrace] = e.backtrace if logger.debug? + logger.error("#{job_name} failed with ", error_details) + false + end + + private + + # Process the ESQL response and push events to the output queue + # @param values [Array[Array]] The ESQL query response hits + # @param columns [Array[Hash]] The ESQL query response columns + # @param output_queue [Queue] The queue to push processed events to + def process_response(values, columns, output_queue) + values.each do |value| + mapped_data = map_column_and_values(columns, value) + @plugin.decorate_and_push_to_queue(output_queue, mapped_data) + end + end + + # Map column names to their corresponding values + # @param columns [Array] Array of column definitions + # @param values [Array] Array of values for the current row + # @return [Hash] Mapped data with column names as keys + def map_column_and_values(columns, values) + columns.each_with_index.with_object({}) do |(column, index), mapped_data| + mapped_data[column["name"]] = values[index] + end + end + end + end + end +end \ No newline at end of file diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 469f4d88..976b3ef5 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '5.1.0' + s.version = '5.2.0' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_esql_spec.rb b/spec/inputs/elasticsearch_esql_spec.rb new file mode 100644 index 00000000..8629fabd --- /dev/null +++ b/spec/inputs/elasticsearch_esql_spec.rb @@ -0,0 +1,96 @@ +# encoding: utf-8 +require "logstash/devutils/rspec/spec_helper" +require "logstash/inputs/elasticsearch" +require "elasticsearch" + +describe LogStash::Inputs::Elasticsearch::Esql do + let(:client) { instance_double(Elasticsearch::Client) } + let(:esql_client) { double("esql-client") } + let(:plugin) { instance_double(LogStash::Inputs::Elasticsearch, params: plugin_config) } + let(:plugin_config) do + { + "query" => "FROM test-index | STATS count() BY field", + "retries" => 3 + } + end + let(:esql_executor) { described_class.new(client, plugin) } + + describe "when initializes" do + it "sets up the ESQL client with correct parameters" do + expect(esql_executor.instance_variable_get(:@query)).to eq(plugin_config["query"]) + expect(esql_executor.instance_variable_get(:@retries)).to eq(plugin_config["retries"]) + end + end + + describe "when faces error while retrying" do + it "retries the given block the specified number of times" do + attempts = 0 + result = esql_executor.retryable("Test Job") do + attempts += 1 + raise StandardError if attempts < 3 + "success" + end + expect(attempts).to eq(3) + expect(result).to eq("success") + end + + it "returns false if the block fails all attempts" do + result = esql_executor.retryable("Test Job") do + raise StandardError + end + expect(result).to eq(false) + end + end + + describe "when executing chain of processes" do + let(:output_queue) { Queue.new } + let(:response) { { 'values' => [%w[foo bar]], 'columns' => [{ 'name' => 'id'}, { 'name' => 'val'}] } } + + before do + allow(esql_executor).to receive(:retryable).and_yield + allow(client).to receive_message_chain(:esql, :query).and_return(response) + allow(plugin).to receive(:decorate_and_push_to_queue) + end + + it "executes the ESQL query and processes the results" do + allow(response).to receive(:headers).and_return({}) + esql_executor.do_run(output_queue) + expect(plugin).to have_received(:decorate_and_push_to_queue).with(output_queue, {'id' => 'foo', 'val' => 'bar'}) + end + + it "logs a warning if the response contains a warning header" do + allow(response).to receive(:headers).and_return({"warning" => "some warning"}) + expect(esql_executor.logger).to receive(:warn).with("ES|QL executor received warning", {:message => "some warning"}) + esql_executor.do_run(output_queue) + end + + it "does not log a warning if the response does not contain a warning header" do + allow(response).to receive(:headers).and_return({}) + expect(esql_executor.logger).not_to receive(:warn) + esql_executor.do_run(output_queue) + end + end + + + describe "when starts processing the response" do + let(:output_queue) { Queue.new } + let(:values) { [%w[foo bar]] } + let(:columns) { [{'name' => 'id'}, {'name' => 'val'}] } + + it "processes the ESQL response and pushes events to the output queue" do + allow(plugin).to receive(:decorate_and_push_to_queue) + esql_executor.send(:process_response, values, columns, output_queue) + expect(plugin).to have_received(:decorate_and_push_to_queue).with(output_queue, {'id' => 'foo', 'val' => 'bar'}) + end + end + + describe "when maps column and values" do + let(:columns) { [{'name' => 'id'}, {'name' => 'val'}] } + let(:values) { %w[foo bar] } + + it "maps column names to their corresponding values" do + result = esql_executor.send(:map_column_and_values, columns, values) + expect(result).to eq({'id' => 'foo', 'val' => 'bar'}) + end + end +end \ No newline at end of file diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 0c07992c..d0a8123a 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1370,4 +1370,110 @@ def extract_transport(client) # on 7.x client.transport is a ES::Transport::Clie client.transport.respond_to?(:transport) ? client.transport.transport : client.transport end + context "#ESQL" do + let(:config) do + { + "query" => "FROM test-index | STATS count() BY field", + "response_type" => "esql", + "retries" => 3 + } + end + let(:es_version) { "8.11.0" } + + before(:each) do + # ES|QL supported |elasticsearch-ruby v8 client is available from 8.17.4 + # this is a safeguard to let tests succeed in <8.17.4 versions, see validation test cases for unsupported behavior + stub_const("LOGSTASH_VERSION", "8.17.4") + end + + describe "#initialize" do + it "sets up the ESQL client with correct parameters" do + expect(plugin.instance_variable_get(:@query)).to eq(config["query"]) + expect(plugin.instance_variable_get(:@response_type)).to eq(config["response_type"]) + expect(plugin.instance_variable_get(:@retries)).to eq(config["retries"]) + end + end + + describe "#register" do + before(:each) do + Elasticsearch::Client.send(:define_method, :ping) { } + allow_any_instance_of(Elasticsearch::Client).to receive(:info).and_return(cluster_info) + end + it "creates ES|QL executor" do + plugin.register + expect(plugin.instance_variable_get(:@query_executor)).to be_an_instance_of(LogStash::Inputs::Elasticsearch::Esql) + end + end + + describe "#validation" do + + describe "LS version" do + context "when compatible" do + + it "does not raise an error" do + expect { plugin.send(:validate_ls_version_for_esql_support!) }.not_to raise_error + end + end + + context "when incompatible" do + before(:each) do + stub_const("LOGSTASH_VERSION", "8.10.0") + end + + it "raises a runtime error" do + expect { plugin.send(:validate_ls_version_for_esql_support!) } + .to raise_error(RuntimeError, /Current version of Logstash does not include Elasticsearch client which supports ES|QL. Please upgrade Logstash to at least 8.17.4/) + end + end + end + + describe "ES version" do + before(:each) do + allow(plugin).to receive(:es_version).and_return("8.10.5") + end + + context "when incompatible" do + it "raises a runtime error" do + expect { plugin.send(:validate_es_for_esql_support!) } + .to raise_error(RuntimeError, /Connected Elasticsearch 8.10.5 version does not supports ES|QL. Please upgrade it./) + end + end + end + + describe "ES|QL query" do + context "when query is valid" do + it "does not raise an error" do + expect { plugin.send(:validate_esql_query!) }.not_to raise_error + end + end + + context "when query is empty" do + let(:config) do + { + "query" => " " + } + end + + it "raises a configuration error" do + expect { plugin.send(:validate_esql_query!) } + .to raise_error(LogStash::ConfigurationError, /`query` cannot be empty/) + end + end + + context "when query doesn't align with ES syntax" do + let(:config) do + { + "query" => "RANDOM query" + } + end + + it "raises a configuration error" do + source_commands = %w[FROM ROW SHOW] + expect { plugin.send(:validate_esql_query!) } + .to raise_error(LogStash::ConfigurationError, "`query` needs to start with any of #{source_commands}") + end + end + end + end + end end From 9c35f22a5f0c7d539bcc6de1b5f37f709f215ef9 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Tue, 8 Apr 2025 17:14:51 -0700 Subject: [PATCH 184/207] Merge with upstream, warn if query doesn't include METADATA which DSL adds by default - might be users are looking for by default. --- lib/logstash/inputs/elasticsearch.rb | 5 +++-- lib/logstash/inputs/elasticsearch/esql.rb | 8 ++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 736c25dc..24a72c94 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -366,15 +366,16 @@ def register def run(output_queue) if @schedule scheduler.cron(@schedule, :overlap => @schedule_overlap) do - @query_executor.do_run(output_queue, get_query_object()) + @query_executor.do_run(output_queue, get_query_object) end scheduler.join else - @query_executor.do_run(output_queue, get_query_object()) + @query_executor.do_run(output_queue, get_query_object) end end def get_query_object + return @query if @response_type == 'esql' if @cursor_tracker query = @cursor_tracker.inject_cursor(@query) @logger.debug("new query is #{query}") diff --git a/lib/logstash/inputs/elasticsearch/esql.rb b/lib/logstash/inputs/elasticsearch/esql.rb index dbcaacf9..3cf50c10 100644 --- a/lib/logstash/inputs/elasticsearch/esql.rb +++ b/lib/logstash/inputs/elasticsearch/esql.rb @@ -17,14 +17,18 @@ def initialize(client, plugin) @plugin = plugin @retries = @plugin_params["retries"] @query = @plugin_params["query"] + unless @query.include?('METADATA') + logger.warn("The query doesn't have METADATA keyword. Including it makes _id and _version available in the documents", {:query => @query}) + end end # Execute the ESQL query and process results # @param output_queue [Queue] The queue to push processed events to - def do_run(output_queue) + # @param query A query to be executed + def do_run(output_queue, query) logger.info("ES|QL executor starting") response = retryable(ESQL_JOB) do - @client.esql.query({ body: { query: @query }, format: 'json' }) + @client.esql.query({ body: { query: query }, format: 'json' }) end # retriable already printed error details return if response == false From 6f99055b23fb092324f4acd5cc124d2f8b47c700 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Wed, 9 Apr 2025 23:57:55 -0700 Subject: [PATCH 185/207] Run unit tests with the LS version which actually supports the ES|QL. --- spec/inputs/elasticsearch_esql_spec.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spec/inputs/elasticsearch_esql_spec.rb b/spec/inputs/elasticsearch_esql_spec.rb index 8629fabd..411840e9 100644 --- a/spec/inputs/elasticsearch_esql_spec.rb +++ b/spec/inputs/elasticsearch_esql_spec.rb @@ -54,20 +54,20 @@ it "executes the ESQL query and processes the results" do allow(response).to receive(:headers).and_return({}) - esql_executor.do_run(output_queue) + esql_executor.do_run(output_queue, plugin_config["query"]) expect(plugin).to have_received(:decorate_and_push_to_queue).with(output_queue, {'id' => 'foo', 'val' => 'bar'}) end it "logs a warning if the response contains a warning header" do allow(response).to receive(:headers).and_return({"warning" => "some warning"}) expect(esql_executor.logger).to receive(:warn).with("ES|QL executor received warning", {:message => "some warning"}) - esql_executor.do_run(output_queue) + esql_executor.do_run(output_queue, plugin_config["query"]) end it "does not log a warning if the response does not contain a warning header" do allow(response).to receive(:headers).and_return({}) expect(esql_executor.logger).not_to receive(:warn) - esql_executor.do_run(output_queue) + esql_executor.do_run(output_queue, plugin_config["query"]) end end @@ -93,4 +93,4 @@ expect(result).to eq({'id' => 'foo', 'val' => 'bar'}) end end -end \ No newline at end of file +end if LOGSTASH_VERSION >= '8.17.5' \ No newline at end of file From 086a5921246caad321cc628d5019178fb3ca6b3b Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 10 Apr 2025 11:18:16 -0700 Subject: [PATCH 186/207] Add query type to the agent. DRY of supported ES/LS versions. --- lib/logstash/inputs/elasticsearch.rb | 23 ++++++++++++----------- spec/inputs/elasticsearch_esql_spec.rb | 2 +- spec/inputs/elasticsearch_spec.rb | 11 +++++------ 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 24a72c94..e052e534 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -287,6 +287,9 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base DEFAULT_EAV_HEADER = { "Elastic-Api-Version" => "2023-10-31" }.freeze INTERNAL_ORIGIN_HEADER = { 'x-elastic-product-origin' => 'logstash-input-elasticsearch'}.freeze + LS_ESQL_SUPPORT_VERSION = "8.17.4" # the version started using elasticsearch-ruby v8 + ES_ESQL_SUPPORT_VERSION = "8.11.0" + def initialize(params={}) super(params) @@ -308,7 +311,6 @@ def register validate_esql_query! inform_ineffective_esql_params else - # for the ES|QL, plugin accepts raw string query but JSON for others @base_query = LogStash::Json.load(@query) if @slices @base_query.include?('slice') && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `query` option cannot specify specific `slice` when configured to manage parallel slices with `slices` option") @@ -349,8 +351,7 @@ def register test_connection! - # make sure connected ES supports ES|QL (8.11+) - validate_es_for_esql_support! if @response_type == 'esql' + validate_es_for_esql_support! setup_serverless @@ -571,8 +572,8 @@ def prepare_user_agent jvm_version = java.lang.System.getProperty('java.version') plugin_version = Gem.loaded_specs["logstash-input-elasticsearch"].version - # example: logstash/7.14.1 (OS=Linux-5.4.0-84-generic-amd64; JVM=AdoptOpenJDK-11.0.11) logstash-input-elasticsearch/4.10.0 - "logstash/#{LOGSTASH_VERSION} (OS=#{os_name}-#{os_version}-#{os_arch}; JVM=#{jvm_vendor}-#{jvm_version}) logstash-#{@plugin_type}-#{config_name}/#{plugin_version}" + # example: logstash/7.14.1 (OS=Linux-5.4.0-84-generic-amd64; JVM=AdoptOpenJDK-11.0.11) logstash-input-elasticsearch/4.10.0 query_type/DSL + "logstash/#{LOGSTASH_VERSION} (OS=#{os_name}-#{os_version}-#{os_arch}; JVM=#{jvm_vendor}-#{jvm_version}) logstash-#{@plugin_type}-#{config_name}/#{plugin_version} query_type/#{@response_type}" end def fill_user_password_from_cloud_auth @@ -735,10 +736,8 @@ def get_transport_client_class end def validate_ls_version_for_esql_support! - # LS 8.17.4+ has elasticsearch-ruby 8.17 client - # elasticsearch-ruby 8.11+ supports ES|QL - if Gem::Version.create(LOGSTASH_VERSION) < Gem::Version.create("8.17.4") - fail("Current version of Logstash does not include Elasticsearch client which supports ES|QL. Please upgrade Logstash to at least 8.17.4") + if Gem::Version.create(LOGSTASH_VERSION) < Gem::Version.create(LS_ESQL_SUPPORT_VERSION) + fail("Current version of Logstash does not include Elasticsearch client which supports ES|QL. Please upgrade Logstash to at least #{LS_ESQL_SUPPORT_VERSION}") end end @@ -755,8 +754,10 @@ def inform_ineffective_esql_params end def validate_es_for_esql_support! - es_supports_esql = Gem::Version.create(es_version) >= Gem::Version.create("8.11") - fail("Connected Elasticsearch #{es_version} version does not supports ES|QL. Please upgrade it.") unless es_supports_esql + return unless @response_type == 'esql' + # make sure connected ES supports ES|QL (8.11+) + es_supports_esql = Gem::Version.create(es_version) >= Gem::Version.create(ES_ESQL_SUPPORT_VERSION) + fail("Connected Elasticsearch #{es_version} version does not supports ES|QL. ES|QL feature requires at least Elasticsearch #{ES_ESQL_SUPPORT_VERSION} version.") unless es_supports_esql end module URIOrEmptyValidator diff --git a/spec/inputs/elasticsearch_esql_spec.rb b/spec/inputs/elasticsearch_esql_spec.rb index 411840e9..2625544e 100644 --- a/spec/inputs/elasticsearch_esql_spec.rb +++ b/spec/inputs/elasticsearch_esql_spec.rb @@ -93,4 +93,4 @@ expect(result).to eq({'id' => 'foo', 'val' => 'bar'}) end end -end if LOGSTASH_VERSION >= '8.17.5' \ No newline at end of file +end if LOGSTASH_VERSION >= LogStash::Inputs::Elasticsearch::LS_ESQL_SUPPORT_VERSION \ No newline at end of file diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index d0a8123a..104972e5 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1378,12 +1378,11 @@ def extract_transport(client) # on 7.x client.transport is a ES::Transport::Clie "retries" => 3 } end - let(:es_version) { "8.11.0" } + let(:es_version) { LogStash::Inputs::Elasticsearch::ES_ESQL_SUPPORT_VERSION } + let(:ls_version) { LogStash::Inputs::Elasticsearch::LS_ESQL_SUPPORT_VERSION } before(:each) do - # ES|QL supported |elasticsearch-ruby v8 client is available from 8.17.4 - # this is a safeguard to let tests succeed in <8.17.4 versions, see validation test cases for unsupported behavior - stub_const("LOGSTASH_VERSION", "8.17.4") + stub_const("LOGSTASH_VERSION", ls_version) end describe "#initialize" do @@ -1422,7 +1421,7 @@ def extract_transport(client) # on 7.x client.transport is a ES::Transport::Clie it "raises a runtime error" do expect { plugin.send(:validate_ls_version_for_esql_support!) } - .to raise_error(RuntimeError, /Current version of Logstash does not include Elasticsearch client which supports ES|QL. Please upgrade Logstash to at least 8.17.4/) + .to raise_error(RuntimeError, /Current version of Logstash does not include Elasticsearch client which supports ES|QL. Please upgrade Logstash to at least #{ls_version}/) end end end @@ -1435,7 +1434,7 @@ def extract_transport(client) # on 7.x client.transport is a ES::Transport::Clie context "when incompatible" do it "raises a runtime error" do expect { plugin.send(:validate_es_for_esql_support!) } - .to raise_error(RuntimeError, /Connected Elasticsearch 8.10.5 version does not supports ES|QL. Please upgrade it./) + .to raise_error(RuntimeError, /Connected Elasticsearch 8.10.5 version does not supports ES|QL. ES|QL feature requires at least Elasticsearch #{es_version} version./) end end end From e30e0f9d58004019dbb39a28724a350bb7ccc4e8 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 10 Apr 2025 14:15:23 -0700 Subject: [PATCH 187/207] Remove query type from user-agent since it is useless, put back accidental method formatting. --- lib/logstash/inputs/elasticsearch.rb | 8 ++++---- lib/logstash/inputs/elasticsearch/esql.rb | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index e052e534..4b007bc5 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -367,11 +367,11 @@ def register def run(output_queue) if @schedule scheduler.cron(@schedule, :overlap => @schedule_overlap) do - @query_executor.do_run(output_queue, get_query_object) + @query_executor.do_run(output_queue, get_query_object()) end scheduler.join else - @query_executor.do_run(output_queue, get_query_object) + @query_executor.do_run(output_queue, get_query_object()) end end @@ -572,8 +572,8 @@ def prepare_user_agent jvm_version = java.lang.System.getProperty('java.version') plugin_version = Gem.loaded_specs["logstash-input-elasticsearch"].version - # example: logstash/7.14.1 (OS=Linux-5.4.0-84-generic-amd64; JVM=AdoptOpenJDK-11.0.11) logstash-input-elasticsearch/4.10.0 query_type/DSL - "logstash/#{LOGSTASH_VERSION} (OS=#{os_name}-#{os_version}-#{os_arch}; JVM=#{jvm_vendor}-#{jvm_version}) logstash-#{@plugin_type}-#{config_name}/#{plugin_version} query_type/#{@response_type}" + # example: logstash/7.14.1 (OS=Linux-5.4.0-84-generic-amd64; JVM=AdoptOpenJDK-11.0.11) logstash-input-elasticsearch/4.10.0 + "logstash/#{LOGSTASH_VERSION} (OS=#{os_name}-#{os_version}-#{os_arch}; JVM=#{jvm_vendor}-#{jvm_version}) logstash-#{@plugin_type}-#{config_name}/#{plugin_version}" end def fill_user_password_from_cloud_auth diff --git a/lib/logstash/inputs/elasticsearch/esql.rb b/lib/logstash/inputs/elasticsearch/esql.rb index 3cf50c10..bf7ff4c8 100644 --- a/lib/logstash/inputs/elasticsearch/esql.rb +++ b/lib/logstash/inputs/elasticsearch/esql.rb @@ -27,6 +27,7 @@ def initialize(client, plugin) # @param query A query to be executed def do_run(output_queue, query) logger.info("ES|QL executor starting") + puts "Query: #{query}" response = retryable(ESQL_JOB) do @client.esql.query({ body: { query: query }, format: 'json' }) end From 7746c14c80ca4d16bada74d9dca9a33747c29aea Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 10 Apr 2025 16:29:24 -0700 Subject: [PATCH 188/207] Initial docs added for ES|QL. --- docs/index.asciidoc | 71 ++++++++++++++++++++++- lib/logstash/inputs/elasticsearch/esql.rb | 1 - 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 6478dff5..722e78ed 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -230,6 +230,40 @@ The next scheduled run: * uses {ref}/point-in-time-api.html#point-in-time-api[Point in time (PIT)] + {ref}/paginate-search-results.html#search-after[Search after] to paginate through all the data, and * updates the value of the field at the end of the pagination. +[id="plugins-{type}s-{plugin}-esql"] +==== ES|QL support +{es} Query Language (ES|QL) provides a SQL-like interface for querying your {es} data. + +To utilize the ES|QL feature with this plugin, the following version requirements must be met: +[cols="1,2",options="header"] +|=== +|Component |Minimum version +|{es} |8.11.0 or newer +|{ls} |8.17.4 or newer +|This plugin |4.23.0+ (4.x series) or 5.2.0+ (5.x series) +|=== + +To configure ES|QL query in the plugin, set the `response_type` to `esql` and provide your ES|QL query in the `query` parameter. + +IMPORTANT: We recommend understanding https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-limitations.html[ES|QL current limitations] before using it in production environments. + +The following is a basic scheduled ES|QL query that runs hourly: +[source, ruby] + input { + elasticsearch { + id => hourly_cron_job + hosts => [ 'https://../'] + api_key => '....' + response_type => 'esql' + query => 'FROM my-index | WHERE @timestamp > NOW() - 1 hour | LIMIT 500' + schedule => '0 * * * *' # every hour at min 0 + } + } + +NOTE: With ES|QL query, {ls} doesn't generate `event.original` + +For comprehensive ES|QL syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[official {es} documentation]. + [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Input configuration options @@ -257,7 +291,7 @@ Please check out <> for details. | <> |<>|No | <> |<>|No | <> |<>|No -| <> |<>, one of `["hits","aggregations"]`|No +| <> |<>, one of `["hits","aggregations","esql"]`|No | <> | <>|No | <> |<>|No | <> |<>|No @@ -507,17 +541,50 @@ the default sort `'{ "sort": { "_shard_doc": "asc" } }'` will be added to the qu [id="plugins-{type}s-{plugin}-response_type"] ===== `response_type` - * Value can be any of: `hits`, `aggregations` + * Value can be any of: `hits`, `aggregations`, `esql` * Default value is `hits` Which part of the result to transform into Logstash events when processing the response from the query. + The default `hits` will generate one event per returned document (i.e. "hit"). + When set to `aggregations`, a single Logstash event will be generated with the contents of the `aggregations` object of the query's response. In this case the `hits` object will be ignored. The parameter `size` will be always be set to 0 regardless of the default or user-defined value set in this plugin. +When using the `esql` setting, the query parameter must be a valid plaintext ES|QL string. +When this setting is active, `target`, `size`, `slices` and `search_api` parameters are ignored. +ES|QL returns query results in a structured tabular format, where data is organized into _columns_ (fields) and _values_ (entries). +The plugin maps each value entry to an event, populating corresponding fields. +For example, a query might produce a table like: + +[cols="2,1,1,2",options="header"] +|=== +|`timestamp` |`user_id` | `action` | `status_code` + +|2025-04-10T12:00:00 |123 |login |200 +|2025-04-10T12:05:00 |456 |purchase |403 +|=== + +For this case, the plugin emits two events look like +[source, json] +[ + { + "timestamp": "2025-04-10T12:00:00", + "user_id": 123, + "action": "login", + "status_code": 200 + }, + { + "timestamp": "2025-04-10T12:05:00", + "user_id": 456, + "action": "purchase", + "status_code": 403 + } +] + [id="plugins-{type}s-{plugin}-request_timeout_seconds"] ===== `request_timeout_seconds` diff --git a/lib/logstash/inputs/elasticsearch/esql.rb b/lib/logstash/inputs/elasticsearch/esql.rb index bf7ff4c8..3cf50c10 100644 --- a/lib/logstash/inputs/elasticsearch/esql.rb +++ b/lib/logstash/inputs/elasticsearch/esql.rb @@ -27,7 +27,6 @@ def initialize(client, plugin) # @param query A query to be executed def do_run(output_queue, query) logger.info("ES|QL executor starting") - puts "Query: #{query}" response = retryable(ESQL_JOB) do @client.esql.query({ body: { query: query }, format: 'json' }) end From 76303d815aed2d72a14d7f2d642c3786d655d287 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 10 Apr 2025 17:08:46 -0700 Subject: [PATCH 189/207] Update query to include condition with string. --- docs/index.asciidoc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 722e78ed..589dc563 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -255,7 +255,10 @@ The following is a basic scheduled ES|QL query that runs hourly: hosts => [ 'https://../'] api_key => '....' response_type => 'esql' - query => 'FROM my-index | WHERE @timestamp > NOW() - 1 hour | LIMIT 500' + query => ' + FROM food-index + | WHERE spicy_level = "hot" AND @timestamp > NOW() - 1 hour + | LIMIT 500' schedule => '0 * * * *' # every hour at min 0 } } From 1fb29f7e112f5bf533411dc7e3d4e3203a80fc69 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Fri, 11 Apr 2025 22:21:44 -0700 Subject: [PATCH 190/207] Tested escaped chars cases, uses orignal query. --- docs/index.asciidoc | 5 ++++- lib/logstash/inputs/elasticsearch/esql.rb | 10 +++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 589dc563..a0bfa1e4 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -258,11 +258,14 @@ The following is a basic scheduled ES|QL query that runs hourly: query => ' FROM food-index | WHERE spicy_level = "hot" AND @timestamp > NOW() - 1 hour - | LIMIT 500' + | LIMIT 500 + ' schedule => '0 * * * *' # every hour at min 0 } } +Set `config.support_escapes: true` in `logstash.yml` if you need to escape special chars in the query. + NOTE: With ES|QL query, {ls} doesn't generate `event.original` For comprehensive ES|QL syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[official {es} documentation]. diff --git a/lib/logstash/inputs/elasticsearch/esql.rb b/lib/logstash/inputs/elasticsearch/esql.rb index 3cf50c10..5cdce780 100644 --- a/lib/logstash/inputs/elasticsearch/esql.rb +++ b/lib/logstash/inputs/elasticsearch/esql.rb @@ -13,10 +13,10 @@ class Esql # @param plugin [LogStash::Inputs::Elasticsearch] The parent plugin instance def initialize(client, plugin) @client = client - @plugin_params = plugin.params @plugin = plugin - @retries = @plugin_params["retries"] - @query = @plugin_params["query"] + @retries = plugin.params["retries"] + + @query = plugin.params["query"] unless @query.include?('METADATA') logger.warn("The query doesn't have METADATA keyword. Including it makes _id and _version available in the documents", {:query => @query}) end @@ -24,11 +24,11 @@ def initialize(client, plugin) # Execute the ESQL query and process results # @param output_queue [Queue] The queue to push processed events to - # @param query A query to be executed + # @param query A query (to obey interface definition) def do_run(output_queue, query) logger.info("ES|QL executor starting") response = retryable(ESQL_JOB) do - @client.esql.query({ body: { query: query }, format: 'json' }) + @client.esql.query({ body: { query: @query }, format: 'json' }) end # retriable already printed error details return if response == false From 5d47f2f60a07971d4c1a5bfad6721a0f54c920f6 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 14 Apr 2025 15:05:43 -0700 Subject: [PATCH 191/207] Integration tests added. --- .../integration/elasticsearch_esql_spec.rb | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 spec/inputs/integration/elasticsearch_esql_spec.rb diff --git a/spec/inputs/integration/elasticsearch_esql_spec.rb b/spec/inputs/integration/elasticsearch_esql_spec.rb new file mode 100644 index 00000000..2c27bd8f --- /dev/null +++ b/spec/inputs/integration/elasticsearch_esql_spec.rb @@ -0,0 +1,148 @@ +# encoding: utf-8 +require "logstash/devutils/rspec/spec_helper" +require "logstash/inputs/elasticsearch" +require "elasticsearch" +require_relative "../../../spec/es_helper" + +describe LogStash::Inputs::Elasticsearch, integration: true do + + SECURE_INTEGRATION = ENV['SECURE_INTEGRATION'].eql? 'true' + ES_HOSTS = ["http#{SECURE_INTEGRATION ? 's' : nil}://#{ESHelper.get_host_port}"] + + let(:plugin) { described_class.new(config) } + let(:es_version) { LogStash::Inputs::Elasticsearch::ES_ESQL_SUPPORT_VERSION } + let(:es_index) { "logstash-esql-integration-#{rand(1000)}" } + let(:test_documents) do + [ + { "message" => "test message 1", "type" => "a", "count" => 1 }, + { "message" => "test message 2", "type" => "a", "count" => 2 }, + { "message" => "test message 3", "type" => "b", "count" => 3 }, + { "message" => "test message 4", "type" => "b", "count" => 4 }, + { "message" => "test message 5", "type" => "c", "count" => 5 } + ] + end + let(:config) do + { + "hosts" => ES_HOSTS, + "response_type" => "esql" + } + end + let(:es_client) do + Elasticsearch::Client.new(hosts: ES_HOSTS) + end + + before(:all) do + # Skip tests if ES version doesn't support ES||QL + es_client = Elasticsearch::Client.new(hosts: ES_HOSTS) # need to separately create since let isn't allowed in before(:context) + es_version_info = es_client.info["version"] + es_gem_version = Gem::Version.create(es_version_info["number"]) + skip "Elasticsearch version does not support ES|QL" if es_gem_version.nil? || es_gem_version < Gem::Version.create(LogStash::Inputs::Elasticsearch::ES_ESQL_SUPPORT_VERSION) + end + + before(:each) do + # Create index with test documents + es_client.indices.create(index: es_index, body: {}) unless es_client.indices.exists?(index: es_index) + + test_documents.each do |doc| + es_client.index(index: es_index, body: doc, refresh: true) + end + end + + after(:each) do + es_client.indices.delete(index: es_index) if es_client.indices.exists?(index: es_index) + end + + context "#run ES|QL queries" do + + before do + stub_const("LOGSTASH_VERSION", LogStash::Inputs::Elasticsearch::LS_ESQL_SUPPORT_VERSION) + allow_any_instance_of(LogStash::Inputs::Elasticsearch).to receive(:exit_plugin?).and_return false, true + end + + before(:each) do + plugin.register + end + + shared_examples "ESQL query execution" do |expected_count| + it "correctly retrieves documents" do + queue = Queue.new + plugin.run(queue) + + event_count = 0 + expected_count.times do |i| + event = queue.pop + expect(event).to be_a(LogStash::Event) + event_count += 1 + end + expect(event_count).to eq(expected_count) + end + end + + context "#FROM query" do + let(:config) do + super().merge("query" => "FROM #{es_index} | SORT count") + end + + include_examples "ESQL query execution", 5 + end + + context "#FROM query and WHERE clause" do + let(:config) do + super().merge("query" => "FROM #{es_index} | WHERE type == \"a\" | SORT count") + end + + include_examples "ESQL query execution", 2 + end + + context "#STATS aggregation" do + let(:config) do + super().merge("query" => "FROM #{es_index} | STATS avg(count) BY type") + end + + it "retrieves aggregated stats" do + queue = Queue.new + plugin.run(queue) + results = [] + 3.times do + event = queue.pop + expect(event).to be_a(LogStash::Event) + results << event.get("avg(count)") + end + + expected_averages = [1.5, 3.5, 5.0] + expect(results.sort).to eq(expected_averages) + end + end + + context "#METADATA included" do + let(:config) do + super().merge("query" => "FROM #{es_index} METADATA _index, _id, _version | SORT count") + end + + it "includes document metadata" do + queue = Queue.new + plugin.run(queue) + + 5.times do + event = queue.pop + expect(event).to be_a(LogStash::Event) + expect(event.get("_index")).not_to be_nil + expect(event.get("_id")).not_to be_nil + expect(event.get("_version")).not_to be_nil + end + end + end + + context "#invalid ES|QL query" do + let(:config) do + super().merge("query" => "FROM undefined index | LIMIT 1") + end + + it "doesn't produce events" do + queue = Queue.new + plugin.run(queue) + expect(queue.empty?).to eq(true) + end + end + end +end \ No newline at end of file From c291e24e7dacfceb9285949f87598f0574b13795 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 14 Apr 2025 15:42:13 -0700 Subject: [PATCH 192/207] Skip the ESQL test if LS with the ES client which doesn't support ESQL feature. --- spec/inputs/integration/elasticsearch_esql_spec.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spec/inputs/integration/elasticsearch_esql_spec.rb b/spec/inputs/integration/elasticsearch_esql_spec.rb index 2c27bd8f..36d4e48f 100644 --- a/spec/inputs/integration/elasticsearch_esql_spec.rb +++ b/spec/inputs/integration/elasticsearch_esql_spec.rb @@ -10,7 +10,6 @@ ES_HOSTS = ["http#{SECURE_INTEGRATION ? 's' : nil}://#{ESHelper.get_host_port}"] let(:plugin) { described_class.new(config) } - let(:es_version) { LogStash::Inputs::Elasticsearch::ES_ESQL_SUPPORT_VERSION } let(:es_index) { "logstash-esql-integration-#{rand(1000)}" } let(:test_documents) do [ @@ -32,11 +31,14 @@ end before(:all) do + is_ls_with_esql_supported_client = Gem::Version.create(LOGSTASH_VERSION) < Gem::Version.create(LogStash::Inputs::Elasticsearch::ES_ESQL_SUPPORT_VERSION) + skip "LS version does not have ES client which supports ES|QL" unless is_ls_with_esql_supported_client + # Skip tests if ES version doesn't support ES||QL es_client = Elasticsearch::Client.new(hosts: ES_HOSTS) # need to separately create since let isn't allowed in before(:context) es_version_info = es_client.info["version"] es_gem_version = Gem::Version.create(es_version_info["number"]) - skip "Elasticsearch version does not support ES|QL" if es_gem_version.nil? || es_gem_version < Gem::Version.create(LogStash::Inputs::Elasticsearch::ES_ESQL_SUPPORT_VERSION) + skip "ES version does not support ES|QL" if es_gem_version.nil? || es_gem_version < Gem::Version.create(LogStash::Inputs::Elasticsearch::ES_ESQL_SUPPORT_VERSION) end before(:each) do From 22e72e9a41d0f05d62827ffa0dd8573c25cc171f Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 14 Apr 2025 16:53:40 -0700 Subject: [PATCH 193/207] Add comments on response type and query params about ES|QL acceptance/info and add docinfo* fields in ineffective fields list. --- lib/logstash/inputs/elasticsearch.rb | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 4b007bc5..3cff066e 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -97,14 +97,17 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # The index or alias to search. config :index, :validate => :string, :default => "logstash-*" - # The query to be executed. Read the Elasticsearch query DSL documentation - # for more info - # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html + # The query to be executed. DSL or ES|QL (when `response_type => 'esql'`) query type is accepted. + # Read the following documentations for more info + # Query DSL: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html + # ES|QL: https://www.elastic.co/guide/en/elasticsearch/reference/current/esql.html config :query, :validate => :string, :default => '{ "sort": [ "_doc" ] }' - # This allows you to speccify the response type: either hits or aggregations - # where hits: normal search request - # aggregations: aggregation request + # This allows you to speccify the response type: one of [hits, aggregations, esql] + # where + # hits: normal search request + # aggregations: aggregation request + # esql: ES|QL request config :response_type, :validate => %w[hits aggregations esql], :default => 'hits' # This allows you to set the maximum number of hits returned per scroll. @@ -749,7 +752,7 @@ def validate_esql_query! end def inform_ineffective_esql_params - ineffective_options = original_params.keys & %w(target size slices search_api) + ineffective_options = original_params.keys & %w(index target size slices search_api, docinfo, docinfo_target, docinfo_fields) @logger.info("Configured #{ineffective_options} params are ineffective in ES|QL mode") if ineffective_options.size > 1 end From af6e24aa50bfde0e196d733a3c2c378650499eeb Mon Sep 17 00:00:00 2001 From: Mashhur <99575341+mashhurs@users.noreply.github.com> Date: Mon, 21 Apr 2025 14:20:43 -0700 Subject: [PATCH 194/207] Update spec/inputs/integration/elasticsearch_esql_spec.rb Fix the condition to correctly compares supported LS version. --- spec/inputs/integration/elasticsearch_esql_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/inputs/integration/elasticsearch_esql_spec.rb b/spec/inputs/integration/elasticsearch_esql_spec.rb index 36d4e48f..3c9ff7a4 100644 --- a/spec/inputs/integration/elasticsearch_esql_spec.rb +++ b/spec/inputs/integration/elasticsearch_esql_spec.rb @@ -31,7 +31,7 @@ end before(:all) do - is_ls_with_esql_supported_client = Gem::Version.create(LOGSTASH_VERSION) < Gem::Version.create(LogStash::Inputs::Elasticsearch::ES_ESQL_SUPPORT_VERSION) + is_ls_with_esql_supported_client = Gem::Version.create(LOGSTASH_VERSION) < Gem::Version.create(LogStash::Inputs::Elasticsearch::LS_ESQL_SUPPORT_VERSION) skip "LS version does not have ES client which supports ES|QL" unless is_ls_with_esql_supported_client # Skip tests if ES version doesn't support ES||QL From 4ce6fa4a113c6fcb62247bcfebd625a80caf0b46 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 21 Apr 2025 14:29:06 -0700 Subject: [PATCH 195/207] Integration test skip condition correction. --- spec/inputs/integration/elasticsearch_esql_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/inputs/integration/elasticsearch_esql_spec.rb b/spec/inputs/integration/elasticsearch_esql_spec.rb index 3c9ff7a4..819fac4c 100644 --- a/spec/inputs/integration/elasticsearch_esql_spec.rb +++ b/spec/inputs/integration/elasticsearch_esql_spec.rb @@ -31,7 +31,7 @@ end before(:all) do - is_ls_with_esql_supported_client = Gem::Version.create(LOGSTASH_VERSION) < Gem::Version.create(LogStash::Inputs::Elasticsearch::LS_ESQL_SUPPORT_VERSION) + is_ls_with_esql_supported_client = Gem::Version.create(LOGSTASH_VERSION) >= Gem::Version.create(LogStash::Inputs::Elasticsearch::LS_ESQL_SUPPORT_VERSION) skip "LS version does not have ES client which supports ES|QL" unless is_ls_with_esql_supported_client # Skip tests if ES version doesn't support ES||QL From 4ed69ff9ddf33174d45f66e0137ac94b2f6c6f99 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 24 Apr 2025 16:26:59 -0700 Subject: [PATCH 196/207] Introduce query_params option to accept drop_null_columns, set default timestampt converter to LogStash::Timestamp, dotted fields extended to nested fields. --- docs/index.asciidoc | 34 ++++++++++++++++++++++- lib/logstash/inputs/elasticsearch.rb | 16 +++++++++++ lib/logstash/inputs/elasticsearch/esql.rb | 31 +++++++++++++++++++-- spec/inputs/elasticsearch_esql_spec.rb | 9 +++--- spec/inputs/elasticsearch_spec.rb | 32 +++++++++++++++++++++ 5 files changed, 113 insertions(+), 9 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index a0bfa1e4..bdf86c9c 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -266,7 +266,13 @@ The following is a basic scheduled ES|QL query that runs hourly: Set `config.support_escapes: true` in `logstash.yml` if you need to escape special chars in the query. -NOTE: With ES|QL query, {ls} doesn't generate `event.original` +NOTE: With ES|QL query, {ls} doesn't generate `event.original`. + +Consider the following caveat scenarios: + +- ES|QL by default returns entire columns even if their values are `null`. The plugin provides a `drop_null_columns` option via <>. Enabling this parameter instructs {es} to automatically exclude columns with null values from query results. +- If your {es} index uses https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/multi-fields[multi-fields] mapping(s), ES|QL query fetches all parent and sub-fields fields. Since {ls} events cannot contain parent field's concrete value and sub-field values together, we recommend using the `DROP` keyword in your ES|QL query explicitly remove sub-fields. +- If your {es} index contains top level `tags` field, this will conflict with {ls} event's reserved `tags` field. {ls} moves `tags` field values to the `_tags` and populates `tags` with `["_tagsparsefailure"]`. For comprehensive ES|QL syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[official {es} documentation]. @@ -297,6 +303,7 @@ Please check out <> for details. | <> |<>|No | <> |<>|No | <> |<>|No +| <> |<>|No | <> |<>, one of `["hits","aggregations","esql"]`|No | <> | <>|No | <> |<>|No @@ -544,6 +551,31 @@ documentation] for more information. When <> resolves to `search_after` and the query does not specify `sort`, the default sort `'{ "sort": { "_shard_doc": "asc" } }'` will be added to the query. Please refer to the {ref}/paginate-search-results.html#search-after[Elasticsearch search_after] parameter to know more. +[id="plugins-{type}s-{plugin}-query_params"] +===== `query_params` +Parameters to send to {es} together with <>. + +Accepted options: +[cols="2,1,3",options="header"] +|=== +|Option name |Default value | Description + +|`drop_null_columns` |`false` | Requests {es} to filter out `null` columns +|=== + +Example +[source, ruby] + input { + elasticsearch { + response_type => 'esql' + query => 'FROM access-logs* | WHERE type="apache"' + query_params => { + drop_null_columns => true + } + } + } + + [id="plugins-{type}s-{plugin}-response_type"] ===== `response_type` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 3cff066e..c5ef8184 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -276,6 +276,10 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # If set, the _source of each hit will be added nested under the target instead of at the top-level config :target, :validate => :field_reference + # Parameters query or query APIs can use + # current acceptable params: drop_null_columns => true|false (for ES|QL) + config :query_params, :validate => :hash, :default => {} + # Obsolete Settings config :ssl, :obsolete => "Set 'ssl_enabled' instead." config :ca_file, :obsolete => "Set 'ssl_certificate_authorities' instead." @@ -323,6 +327,7 @@ def register @retries < 0 && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `retries` option must be equal or greater than zero, got `#{@retries}`") + validate_query_params! validate_authentication fill_user_password_from_cloud_auth @@ -751,6 +756,17 @@ def validate_esql_query! fail(LogStash::ConfigurationError, "`query` needs to start with any of #{source_commands}") unless contains_source_command end + def validate_query_params! + # keep the original, remove ES|QL accepted params and validate + cloned_query_params = @query_params.clone + if @response_type == 'esql' + cloned_query_params.delete("drop_null_columns") + fail(LogStash::ConfigurationError, "#{cloned_query_params} not accepted when `response_type => 'esql'`") if cloned_query_params.any? + else + fail(LogStash::ConfigurationError, "#{@query_params} not accepted when `response_type => #{@response_type}`") if @query_params.any? + end + end + def inform_ineffective_esql_params ineffective_options = original_params.keys & %w(index target size slices search_api, docinfo, docinfo_target, docinfo_fields) @logger.info("Configured #{ineffective_options} params are ineffective in ES|QL mode") if ineffective_options.size > 1 diff --git a/lib/logstash/inputs/elasticsearch/esql.rb b/lib/logstash/inputs/elasticsearch/esql.rb index 5cdce780..948cbb0d 100644 --- a/lib/logstash/inputs/elasticsearch/esql.rb +++ b/lib/logstash/inputs/elasticsearch/esql.rb @@ -8,6 +8,10 @@ class Esql ESQL_JOB = "ES|QL job" + ESQL_PARSERS_BY_TYPE = Hash.new(lambda { |x| x }).merge( + 'date' => ->(value) { value && LogStash::Timestamp.new(value) }, + ) + # Initialize the ESQL query executor # @param client [Elasticsearch::Client] The Elasticsearch client instance # @param plugin [LogStash::Inputs::Elasticsearch] The parent plugin instance @@ -20,6 +24,9 @@ def initialize(client, plugin) unless @query.include?('METADATA') logger.warn("The query doesn't have METADATA keyword. Including it makes _id and _version available in the documents", {:query => @query}) end + + params = plugin.params["query_params"] || {} + @drop_null_columns = params["drop_null_columns"] || false end # Execute the ESQL query and process results @@ -28,7 +35,7 @@ def initialize(client, plugin) def do_run(output_queue, query) logger.info("ES|QL executor starting") response = retryable(ESQL_JOB) do - @client.esql.query({ body: { query: @query }, format: 'json' }) + @client.esql.query({ body: { query: @query }, format: 'json', drop_null_columns: @drop_null_columns }) end # retriable already printed error details return if response == false @@ -64,7 +71,13 @@ def retryable(job_name, &block) def process_response(values, columns, output_queue) values.each do |value| mapped_data = map_column_and_values(columns, value) - @plugin.decorate_and_push_to_queue(output_queue, mapped_data) + nest_structured_data = nest_keys(mapped_data) + @plugin.decorate_and_push_to_queue(output_queue, nest_structured_data) + rescue => e + # if event creation fails with whatever reason, inform user and tag with failure and return entry as it is + logger.warn("Event creation error, ", message: e.message, exception: e.class, data: { "columns" => columns, "values" => [value] }) + failed_event = LogStash::Event.new("columns" => columns, "values" => [value], "tags" => ['_elasticsearch_input_failure']) + output_queue << failed_event end end @@ -74,7 +87,19 @@ def process_response(values, columns, output_queue) # @return [Hash] Mapped data with column names as keys def map_column_and_values(columns, values) columns.each_with_index.with_object({}) do |(column, index), mapped_data| - mapped_data[column["name"]] = values[index] + mapped_data[column["name"]] = ESQL_PARSERS_BY_TYPE[column["type"]].call(values[index]) + end + end + + # Transforms dotted keys to nested JSON shape + # @param dot_keyed_hash [Hash] whose keys are dotted (example 'a.b.c.d': 'val') + # @return [Hash] whose keys are nested with value mapped ({'a':{'b':{'c':{'d':'val'}}}}) + def nest_keys(dot_keyed_hash) + dot_keyed_hash.each_with_object({}) do |(key, value), result| + key_parts = key.to_s.split('.') + *path, leaf = key_parts + leaf_scope = path.inject(result) { |scope, part| scope[part] ||= {} } + leaf_scope[leaf] = value end end end diff --git a/spec/inputs/elasticsearch_esql_spec.rb b/spec/inputs/elasticsearch_esql_spec.rb index 2625544e..3980a8dc 100644 --- a/spec/inputs/elasticsearch_esql_spec.rb +++ b/spec/inputs/elasticsearch_esql_spec.rb @@ -44,7 +44,7 @@ describe "when executing chain of processes" do let(:output_queue) { Queue.new } - let(:response) { { 'values' => [%w[foo bar]], 'columns' => [{ 'name' => 'id'}, { 'name' => 'val'}] } } + let(:response) { { 'values' => [%w[foo bar]], 'columns' => [{ 'name' => 'a.b.1.d'}, { 'name' => 'h_g.k$l.m.0'}] } } before do allow(esql_executor).to receive(:retryable).and_yield @@ -55,7 +55,7 @@ it "executes the ESQL query and processes the results" do allow(response).to receive(:headers).and_return({}) esql_executor.do_run(output_queue, plugin_config["query"]) - expect(plugin).to have_received(:decorate_and_push_to_queue).with(output_queue, {'id' => 'foo', 'val' => 'bar'}) + expect(plugin).to have_received(:decorate_and_push_to_queue).with(output_queue, {"a"=>{"b"=>{"1"=>{"d"=>"foo"}}}, "h_g"=>{"k$l"=>{"m"=>{"0"=>"bar"}}}}) end it "logs a warning if the response contains a warning header" do @@ -71,16 +71,15 @@ end end - describe "when starts processing the response" do let(:output_queue) { Queue.new } let(:values) { [%w[foo bar]] } - let(:columns) { [{'name' => 'id'}, {'name' => 'val'}] } + let(:columns) { [{'name' => 'some.id'}, {'name' => 'some.val'}] } it "processes the ESQL response and pushes events to the output queue" do allow(plugin).to receive(:decorate_and_push_to_queue) esql_executor.send(:process_response, values, columns, output_queue) - expect(plugin).to have_received(:decorate_and_push_to_queue).with(output_queue, {'id' => 'foo', 'val' => 'bar'}) + expect(plugin).to have_received(:decorate_and_push_to_queue).with(output_queue, {"some"=>{"id"=>"foo", "val"=>"bar"}}) end end diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 104972e5..b7c97e00 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1473,6 +1473,38 @@ def extract_transport(client) # on 7.x client.transport is a ES::Transport::Clie end end end + + describe "with extra params" do + context "empty `query_params`" do + let(:config) { + super().merge('query_params' => {}) + } + + it "does not raise a configuration error" do + expect { plugin.send(:validate_query_params!) }.not_to raise_error + end + end + + context "with actual `drop_null_columns` value" do + let(:config) { + super().merge('query_params' => { 'drop_null_columns' => true }) + } + + it "does not raise a configuration error" do + expect { plugin.send(:validate_query_params!) }.not_to raise_error + end + end + + context "with extra non ES|QL params" do + let(:config) { + super().merge('query_params' => { 'drop_null_columns' => true, 'test' => 'hi'}) + } + + it "does not raise a configuration error" do + expect { plugin.send(:validate_query_params!) }.to raise_error(LogStash::ConfigurationError, "{\"test\"=>\"hi\"} not accepted when `response_type => 'esql'`") + end + end + end end end end From 0725f98a10d849714e342436359a3e57522b6f49 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 24 Apr 2025 21:57:44 -0700 Subject: [PATCH 197/207] Fix the failed integration test. --- spec/inputs/integration/elasticsearch_esql_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/inputs/integration/elasticsearch_esql_spec.rb b/spec/inputs/integration/elasticsearch_esql_spec.rb index 819fac4c..b27b8dd5 100644 --- a/spec/inputs/integration/elasticsearch_esql_spec.rb +++ b/spec/inputs/integration/elasticsearch_esql_spec.rb @@ -116,9 +116,9 @@ end end - context "#METADATA included" do + context "#METADATA" do let(:config) do - super().merge("query" => "FROM #{es_index} METADATA _index, _id, _version | SORT count") + super().merge("query" => "FROM #{es_index} METADATA _index, _id, _version | DROP message.keyword, type.keyword | SORT count") end it "includes document metadata" do From cfb36f37d5bf1dd75f17185cfca3a7cac9daad3a Mon Sep 17 00:00:00 2001 From: Mashhur Date: Wed, 30 Apr 2025 18:53:12 -0700 Subject: [PATCH 198/207] Request dropping null columns and filter out null values. Consider setting the result into target if defined. Debug logs added which can help to investigate query and its result. --- docs/index.asciidoc | 125 +++++++++-------- lib/logstash/inputs/elasticsearch.rb | 60 +++----- lib/logstash/inputs/elasticsearch/esql.rb | 80 ++++++----- spec/inputs/elasticsearch_esql_spec.rb | 163 +++++++++++++++------- spec/inputs/elasticsearch_spec.rb | 34 +---- 5 files changed, 238 insertions(+), 224 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index bdf86c9c..4b57ad0e 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -268,13 +268,67 @@ Set `config.support_escapes: true` in `logstash.yml` if you need to escape speci NOTE: With ES|QL query, {ls} doesn't generate `event.original`. -Consider the following caveat scenarios: +[id="plugins-{type}s-{plugin}-esql-event-mapping"] +===== Mapping ES|QL result to {ls} event +ES|QL returns query results in a structured tabular format, where data is organized into _columns_ (fields) and _values_ (entries). +The plugin maps each value entry to an event, populating corresponding fields. +For example, a query might produce a table like: + +[cols="2,1,1,1,2",options="header"] +|=== +|`timestamp` |`user_id` | `action` | `status.code` | `status.desc` + +|2025-04-10T12:00:00 |123 |login |200 | Success +|2025-04-10T12:05:00 |456 |purchase |403 | Forbidden (unauthorized user) +|=== + +For this case, the plugin emits two events look like +[source, json] +[ + { + "timestamp": "2025-04-10T12:00:00", + "user_id": 123, + "action": "login", + "status": { + "code": 200, + "desc": "Success" + } + }, + { + "timestamp": "2025-04-10T12:05:00", + "user_id": 456, + "action": "purchase", + "status": { + "code": 403, + "desc": "Forbidden (unauthorized user)" + } + } +] -- ES|QL by default returns entire columns even if their values are `null`. The plugin provides a `drop_null_columns` option via <>. Enabling this parameter instructs {es} to automatically exclude columns with null values from query results. -- If your {es} index uses https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/multi-fields[multi-fields] mapping(s), ES|QL query fetches all parent and sub-fields fields. Since {ls} events cannot contain parent field's concrete value and sub-field values together, we recommend using the `DROP` keyword in your ES|QL query explicitly remove sub-fields. -- If your {es} index contains top level `tags` field, this will conflict with {ls} event's reserved `tags` field. {ls} moves `tags` field values to the `_tags` and populates `tags` with `["_tagsparsefailure"]`. +NOTE: If your index has a mapping with sub-objects where `status.code` and `status.desc` actually dotted fields, they appear in {ls} events as a nested structure. -For comprehensive ES|QL syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[official {es} documentation]. +[id="plugins-{type}s-{plugin}-esql-multifields"] +===== Conflict on multi-fields + +ES|QL query fetches all parent and sub-fields fields if your {es} index has https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/multi-fields[multi-fields] or https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/subobjects[subobjects]. +Since {ls} events cannot contain parent field's concrete value and sub-field values together, the plugin cannot map the result to {ls} event and produces `_elasticsearch_input_failure` tagged failed event. +We recommend using the `RENAME` (or `DROP`) keyword in your ES|QL query explicitly rename the fields to overcome this issue. +To illustrate the situation with example, assuming your mapping has a time `time` field with `time.min` and `time.max` sub-fields as following: +[source, ruby] + "properties": { + "time": { "type": "long" }, + "time.min": { "type": "long" }, + "time.max": { "type": "long" } + } + +The ES|QL result will contain all three fields but the plugin cannot map them into {ls} event. +To avoid this, you can use the `RENAME` keyword to rename the `time` parent field to get all three fields with unique fields. +[source, ruby] + ... + query => 'FROM my-index | RENAME time AS time.current' + ... + +For comprehensive ES|QL syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[{es} ES|QL documentation]. [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Input configuration options @@ -303,7 +357,6 @@ Please check out <> for details. | <> |<>|No | <> |<>|No | <> |<>|No -| <> |<>|No | <> |<>, one of `["hits","aggregations","esql"]`|No | <> | <>|No | <> |<>|No @@ -545,37 +598,13 @@ environment variables e.g. `proxy => '${LS_PROXY:}'`. * Value type is <> * Default value is `'{ "sort": [ "_doc" ] }'` -The query to be executed. Read the {ref}/query-dsl.html[Elasticsearch query DSL -documentation] for more information. +The query to be executed. +Accepted query shape is DSL or ES|QL (when `response_type => 'esql'`). +Read the {ref}/query-dsl.html[{es} query DSL documentation] or {ref}/esql.html[{es} ES|QL documentation] for more information. When <> resolves to `search_after` and the query does not specify `sort`, the default sort `'{ "sort": { "_shard_doc": "asc" } }'` will be added to the query. Please refer to the {ref}/paginate-search-results.html#search-after[Elasticsearch search_after] parameter to know more. -[id="plugins-{type}s-{plugin}-query_params"] -===== `query_params` -Parameters to send to {es} together with <>. - -Accepted options: -[cols="2,1,3",options="header"] -|=== -|Option name |Default value | Description - -|`drop_null_columns` |`false` | Requests {es} to filter out `null` columns -|=== - -Example -[source, ruby] - input { - elasticsearch { - response_type => 'esql' - query => 'FROM access-logs* | WHERE type="apache"' - query_params => { - drop_null_columns => true - } - } - } - - [id="plugins-{type}s-{plugin}-response_type"] ===== `response_type` @@ -592,36 +621,8 @@ contents of the `aggregations` object of the query's response. In this case the `hits` object will be ignored. The parameter `size` will be always be set to 0 regardless of the default or user-defined value set in this plugin. -When using the `esql` setting, the query parameter must be a valid plaintext ES|QL string. +When using the `esql` setting, the query must be a valid ES|QL string. When this setting is active, `target`, `size`, `slices` and `search_api` parameters are ignored. -ES|QL returns query results in a structured tabular format, where data is organized into _columns_ (fields) and _values_ (entries). -The plugin maps each value entry to an event, populating corresponding fields. -For example, a query might produce a table like: - -[cols="2,1,1,2",options="header"] -|=== -|`timestamp` |`user_id` | `action` | `status_code` - -|2025-04-10T12:00:00 |123 |login |200 -|2025-04-10T12:05:00 |456 |purchase |403 -|=== - -For this case, the plugin emits two events look like -[source, json] -[ - { - "timestamp": "2025-04-10T12:00:00", - "user_id": 123, - "action": "login", - "status_code": 200 - }, - { - "timestamp": "2025-04-10T12:05:00", - "user_id": 456, - "action": "purchase", - "status_code": 403 - } -] [id="plugins-{type}s-{plugin}-request_timeout_seconds"] ===== `request_timeout_seconds` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index c5ef8184..c5f596eb 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -97,7 +97,7 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # The index or alias to search. config :index, :validate => :string, :default => "logstash-*" - # The query to be executed. DSL or ES|QL (when `response_type => 'esql'`) query type is accepted. + # The query to be executed. DSL or ES|QL (when `response_type => 'esql'`) query shape is accepted. # Read the following documentations for more info # Query DSL: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html # ES|QL: https://www.elastic.co/guide/en/elasticsearch/reference/current/esql.html @@ -276,10 +276,6 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # If set, the _source of each hit will be added nested under the target instead of at the top-level config :target, :validate => :field_reference - # Parameters query or query APIs can use - # current acceptable params: drop_null_columns => true|false (for ES|QL) - config :query_params, :validate => :hash, :default => {} - # Obsolete Settings config :ssl, :obsolete => "Set 'ssl_enabled' instead." config :ca_file, :obsolete => "Set 'ssl_certificate_authorities' instead." @@ -316,7 +312,8 @@ def register if @response_type == 'esql' validate_ls_version_for_esql_support! validate_esql_query! - inform_ineffective_esql_params + ignored_options = original_params.keys & %w(index size slices search_api, docinfo, docinfo_target, docinfo_fields) + @logger.info("Configured #{ignored_options} params are ignored in ES|QL query") if ignored_options&.size > 1 else @base_query = LogStash::Json.load(@query) if @slices @@ -327,7 +324,6 @@ def register @retries < 0 && fail(LogStash::ConfigurationError, "Elasticsearch Input Plugin's `retries` option must be equal or greater than zero, got `#{@retries}`") - validate_query_params! validate_authentication fill_user_password_from_cloud_auth @@ -383,6 +379,22 @@ def run(output_queue) end end + ## + # This can be called externally from the query_executor + public + def push_hit(hit, output_queue, root_field = '_source') + event = event_from_hit(hit, root_field) + decorate(event) + output_queue << event + record_last_value(event) + end + + def decorate_event(event) + decorate(event) + end + + private + def get_query_object return @query if @response_type == 'esql' if @cursor_tracker @@ -394,16 +406,6 @@ def get_query_object LogStash::Json.load(query) end - ## - # This can be called externally from the query_executor - public - def push_hit(hit, output_queue, root_field = '_source') - event = event_from_hit(hit, root_field) - decorate(event) - output_queue << event - record_last_value(event) - end - def record_last_value(event) @cursor_tracker.record_last_value(event) if @tracking_field end @@ -419,12 +421,6 @@ def event_from_hit(hit, root_field) return event_factory.new_event('event' => { 'original' => serialized_hit }, 'tags' => ['_elasticsearch_input_failure']) end - def decorate_and_push_to_queue(output_queue, mapped_entry) - event = targeted_event_factory.new_event mapped_entry - decorate(event) - output_queue << event - end - def set_docinfo_fields(hit, event) # do not assume event[@docinfo_target] to be in-place updatable. first get it, update it, then at the end set it in the event. docinfo_target = event.get(@docinfo_target) || {} @@ -441,8 +437,6 @@ def set_docinfo_fields(hit, event) event.set(@docinfo_target, docinfo_target) end - private - def hosts_default?(hosts) hosts.nil? || ( hosts.is_a?(Array) && hosts.empty? ) end @@ -756,22 +750,6 @@ def validate_esql_query! fail(LogStash::ConfigurationError, "`query` needs to start with any of #{source_commands}") unless contains_source_command end - def validate_query_params! - # keep the original, remove ES|QL accepted params and validate - cloned_query_params = @query_params.clone - if @response_type == 'esql' - cloned_query_params.delete("drop_null_columns") - fail(LogStash::ConfigurationError, "#{cloned_query_params} not accepted when `response_type => 'esql'`") if cloned_query_params.any? - else - fail(LogStash::ConfigurationError, "#{@query_params} not accepted when `response_type => #{@response_type}`") if @query_params.any? - end - end - - def inform_ineffective_esql_params - ineffective_options = original_params.keys & %w(index target size slices search_api, docinfo, docinfo_target, docinfo_fields) - @logger.info("Configured #{ineffective_options} params are ineffective in ES|QL mode") if ineffective_options.size > 1 - end - def validate_es_for_esql_support! return unless @response_type == 'esql' # make sure connected ES supports ES|QL (8.11+) diff --git a/lib/logstash/inputs/elasticsearch/esql.rb b/lib/logstash/inputs/elasticsearch/esql.rb index 948cbb0d..a6b9ed2c 100644 --- a/lib/logstash/inputs/elasticsearch/esql.rb +++ b/lib/logstash/inputs/elasticsearch/esql.rb @@ -18,24 +18,23 @@ class Esql def initialize(client, plugin) @client = client @plugin = plugin + @target_field = plugin.params["target"] @retries = plugin.params["retries"] @query = plugin.params["query"] unless @query.include?('METADATA') - logger.warn("The query doesn't have METADATA keyword. Including it makes _id and _version available in the documents", {:query => @query}) + logger.info("`METADATA` not found the query. `_id`, `_version` and `_index` will not be available in the result", {:query => @query}) end - - params = plugin.params["query_params"] || {} - @drop_null_columns = params["drop_null_columns"] || false + logger.debug("ES|QL executor initialized with", {:query => @query}) end # Execute the ESQL query and process results # @param output_queue [Queue] The queue to push processed events to # @param query A query (to obey interface definition) def do_run(output_queue, query) - logger.info("ES|QL executor starting") + logger.info("ES|QL executor has started") response = retryable(ESQL_JOB) do - @client.esql.query({ body: { query: @query }, format: 'json', drop_null_columns: @drop_null_columns }) + @client.esql.query({ body: { query: @query }, format: 'json', drop_null_columns: true }) end # retriable already printed error details return if response == false @@ -43,9 +42,11 @@ def do_run(output_queue, query) if response&.headers&.dig("warning") logger.warn("ES|QL executor received warning", {:message => response.headers["warning"]}) end - if response['values'] && response['columns'] - process_response(response['values'], response['columns'], output_queue) - end + columns = response['columns']&.freeze + values = response['values']&.freeze + logger.debug("ES|QL query response size: #{values&.size}") + + process_response(columns, values, output_queue) if columns && values end # Execute a retryable operation with proper error handling @@ -65,42 +66,51 @@ def retryable(job_name, &block) private # Process the ESQL response and push events to the output queue - # @param values [Array[Array]] The ESQL query response hits # @param columns [Array[Hash]] The ESQL query response columns + # @param values [Array[Array]] The ESQL query response hits # @param output_queue [Queue] The queue to push processed events to - def process_response(values, columns, output_queue) - values.each do |value| - mapped_data = map_column_and_values(columns, value) - nest_structured_data = nest_keys(mapped_data) - @plugin.decorate_and_push_to_queue(output_queue, nest_structured_data) + def process_response(columns, values, output_queue) + column_specs = columns.map { |column| ColumnSpec.new(column) } + values.each do |row| + event = column_specs.zip(row).each_with_object(LogStash::Event.new) do |(column, value), event| + # `unless value.nil?` is a part of `drop_null_columns` that if some of columns' values are not `nil`, `nil` values appear + # we should continuously filter out them to achieve full `drop_null_columns` on each individual row (ideal `LIMIT 1` result) + unless value.nil? + field_reference = @target_field.nil? ? column.field_reference : "[#{@target_field}][#{column.field_reference}]" + event.set(field_reference, ESQL_PARSERS_BY_TYPE[column.type].call(value)) + end + end + @plugin.decorate_event(event) + output_queue << event rescue => e # if event creation fails with whatever reason, inform user and tag with failure and return entry as it is - logger.warn("Event creation error, ", message: e.message, exception: e.class, data: { "columns" => columns, "values" => [value] }) - failed_event = LogStash::Event.new("columns" => columns, "values" => [value], "tags" => ['_elasticsearch_input_failure']) + logger.warn("Event creation error, ", message: e.message, exception: e.class, data: { "columns" => columns, "values" => [row] }) + failed_event = LogStash::Event.new("columns" => columns, "values" => [row], "tags" => ['_elasticsearch_input_failure']) output_queue << failed_event end end + end + + # Class representing a column specification in the ESQL response['columns'] + # The class's main purpose is to provide a structure for the event key + # columns is an array with `name` and `type` pair (example: `{"name"=>"@timestamp", "type"=>"date"}`) + # @attr_reader :name [String] The name of the column + # @attr_reader :type [String] The type of the column + class ColumnSpec + attr_reader :name, :type + + def initialize(spec) + @name = isolate(spec.fetch('https://github.com/name')) + @type = isolate(spec.fetch('https://github.com/type')) + end - # Map column names to their corresponding values - # @param columns [Array] Array of column definitions - # @param values [Array] Array of values for the current row - # @return [Hash] Mapped data with column names as keys - def map_column_and_values(columns, values) - columns.each_with_index.with_object({}) do |(column, index), mapped_data| - mapped_data[column["name"]] = ESQL_PARSERS_BY_TYPE[column["type"]].call(values[index]) - end + def field_reference + @_field_reference ||= '[' + name.gsub('.', '][') + ']' end - # Transforms dotted keys to nested JSON shape - # @param dot_keyed_hash [Hash] whose keys are dotted (example 'a.b.c.d': 'val') - # @return [Hash] whose keys are nested with value mapped ({'a':{'b':{'c':{'d':'val'}}}}) - def nest_keys(dot_keyed_hash) - dot_keyed_hash.each_with_object({}) do |(key, value), result| - key_parts = key.to_s.split('.') - *path, leaf = key_parts - leaf_scope = path.inject(result) { |scope, part| scope[part] ||= {} } - leaf_scope[leaf] = value - end + private + def isolate(value) + value.frozen? ? value : value.clone.freeze end end end diff --git a/spec/inputs/elasticsearch_esql_spec.rb b/spec/inputs/elasticsearch_esql_spec.rb index 3980a8dc..1d6a12c5 100644 --- a/spec/inputs/elasticsearch_esql_spec.rb +++ b/spec/inputs/elasticsearch_esql_spec.rb @@ -15,81 +15,138 @@ end let(:esql_executor) { described_class.new(client, plugin) } - describe "when initializes" do + describe "#initialization" do it "sets up the ESQL client with correct parameters" do expect(esql_executor.instance_variable_get(:@query)).to eq(plugin_config["query"]) expect(esql_executor.instance_variable_get(:@retries)).to eq(plugin_config["retries"]) + expect(esql_executor.instance_variable_get(:@plugin)).to eq(plugin) + expect(esql_executor.instance_variable_get(:@target_field)).to eq(nil) end end - describe "when faces error while retrying" do - it "retries the given block the specified number of times" do - attempts = 0 - result = esql_executor.retryable("Test Job") do - attempts += 1 - raise StandardError if attempts < 3 - "success" + describe "#execution" do + let(:output_queue) { Queue.new } + + context "when faces error while retrying" do + it "retries the given block the specified number of times" do + attempts = 0 + result = esql_executor.retryable("Test Job") do + attempts += 1 + raise StandardError if attempts < 3 + "success" + end + expect(attempts).to eq(3) + expect(result).to eq("success") end - expect(attempts).to eq(3) - expect(result).to eq("success") - end - it "returns false if the block fails all attempts" do - result = esql_executor.retryable("Test Job") do - raise StandardError + it "returns false if the block fails all attempts" do + result = esql_executor.retryable("Test Job") do + raise StandardError + end + expect(result).to eq(false) end - expect(result).to eq(false) end - end - describe "when executing chain of processes" do - let(:output_queue) { Queue.new } - let(:response) { { 'values' => [%w[foo bar]], 'columns' => [{ 'name' => 'a.b.1.d'}, { 'name' => 'h_g.k$l.m.0'}] } } + context "when executing chain of processes" do + let(:response) { { 'values' => [%w[foo bar]], 'columns' => [{ 'name' => 'a.b.1.d', 'type' => 'keyword' }, + { 'name' => 'h_g.k$l.m.0', 'type' => 'keyword' }] } } - before do - allow(esql_executor).to receive(:retryable).and_yield - allow(client).to receive_message_chain(:esql, :query).and_return(response) - allow(plugin).to receive(:decorate_and_push_to_queue) - end + before do + allow(esql_executor).to receive(:retryable).and_yield + allow(client).to receive_message_chain(:esql, :query).and_return(response) + allow(plugin).to receive(:decorate_event) + end - it "executes the ESQL query and processes the results" do - allow(response).to receive(:headers).and_return({}) - esql_executor.do_run(output_queue, plugin_config["query"]) - expect(plugin).to have_received(:decorate_and_push_to_queue).with(output_queue, {"a"=>{"b"=>{"1"=>{"d"=>"foo"}}}, "h_g"=>{"k$l"=>{"m"=>{"0"=>"bar"}}}}) - end + it "executes the ESQL query and processes the results" do + allow(response).to receive(:headers).and_return({}) + esql_executor.do_run(output_queue, plugin_config["query"]) + expect(output_queue.size).to eq(1) - it "logs a warning if the response contains a warning header" do - allow(response).to receive(:headers).and_return({"warning" => "some warning"}) - expect(esql_executor.logger).to receive(:warn).with("ES|QL executor received warning", {:message => "some warning"}) - esql_executor.do_run(output_queue, plugin_config["query"]) - end + event = output_queue.pop + expect(event.get('[a][b][1][d]')).to eq('foo') + expect(event.get('[h_g][k$l][m][0]')).to eq('bar') + end + + it "logs a warning if the response contains a warning header" do + allow(response).to receive(:headers).and_return({ "warning" => "some warning" }) + expect(esql_executor.logger).to receive(:warn).with("ES|QL executor received warning", { :message => "some warning" }) + esql_executor.do_run(output_queue, plugin_config["query"]) + end - it "does not log a warning if the response does not contain a warning header" do - allow(response).to receive(:headers).and_return({}) - expect(esql_executor.logger).not_to receive(:warn) - esql_executor.do_run(output_queue, plugin_config["query"]) + it "does not log a warning if the response does not contain a warning header" do + allow(response).to receive(:headers).and_return({}) + expect(esql_executor.logger).not_to receive(:warn) + esql_executor.do_run(output_queue, plugin_config["query"]) + end end - end - describe "when starts processing the response" do - let(:output_queue) { Queue.new } - let(:values) { [%w[foo bar]] } - let(:columns) { [{'name' => 'some.id'}, {'name' => 'some.val'}] } + describe "multiple rows in the result" do + let(:response) { { 'values' => rows, 'columns' => [{ 'name' => 'key.1', 'type' => 'keyword' }, + { 'name' => 'key.2', 'type' => 'keyword' }] } } + + before do + allow(esql_executor).to receive(:retryable).and_yield + allow(client).to receive_message_chain(:esql, :query).and_return(response) + allow(plugin).to receive(:decorate_event) + allow(response).to receive(:headers).and_return({}) + end + + context "when mapping" do + let(:rows) { [%w[foo bar], %w[hello world]] } + + it "1:1 maps rows to events" do + esql_executor.do_run(output_queue, plugin_config["query"]) + expect(output_queue.size).to eq(2) + + event_1 = output_queue.pop + expect(event_1.get('[key][1]')).to eq('foo') + expect(event_1.get('[key][2]')).to eq('bar') + + event_2 = output_queue.pop + expect(event_2.get('[key][1]')).to eq('hello') + expect(event_2.get('[key][2]')).to eq('world') + end + end + + context "when partial nil values appear" do + let(:rows) { [[nil, "bar"], ["hello", nil]] } + + it "ignores the nil values" do + esql_executor.do_run(output_queue, plugin_config["query"]) + expect(output_queue.size).to eq(2) - it "processes the ESQL response and pushes events to the output queue" do - allow(plugin).to receive(:decorate_and_push_to_queue) - esql_executor.send(:process_response, values, columns, output_queue) - expect(plugin).to have_received(:decorate_and_push_to_queue).with(output_queue, {"some"=>{"id"=>"foo", "val"=>"bar"}}) + event_1 = output_queue.pop + expect(event_1.get('[key][1]')).to eq(nil) + expect(event_1.get('[key][2]')).to eq('bar') + + event_2 = output_queue.pop + expect(event_2.get('[key][1]')).to eq('hello') + expect(event_2.get('[key][2]')).to eq(nil) + end + end end end - describe "when maps column and values" do - let(:columns) { [{'name' => 'id'}, {'name' => 'val'}] } - let(:values) { %w[foo bar] } + describe "#column spec" do + let(:valid_spec) { { 'name' => 'field.name', 'type' => 'keyword' } } + let(:column_spec) { LogStash::Inputs::Elasticsearch::ColumnSpec.new(valid_spec) } + + context "when initializes" do + it "sets the name and type attributes" do + expect(column_spec.name).to eq("field.name") + expect(column_spec.type).to eq("keyword") + end + + it "freezes the name and type attributes" do + expect(column_spec.name).to be_frozen + expect(column_spec.type).to be_frozen + end + end - it "maps column names to their corresponding values" do - result = esql_executor.send(:map_column_and_values, columns, values) - expect(result).to eq({'id' => 'foo', 'val' => 'bar'}) + context "when calls the field reference" do + it "returns the correct field reference format" do + expect(column_spec.field_reference).to eq("[field][name]") + end end end end if LOGSTASH_VERSION >= LogStash::Inputs::Elasticsearch::LS_ESQL_SUPPORT_VERSION \ No newline at end of file diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index b7c97e00..bc404a51 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1370,7 +1370,7 @@ def extract_transport(client) # on 7.x client.transport is a ES::Transport::Clie client.transport.respond_to?(:transport) ? client.transport.transport : client.transport end - context "#ESQL" do + describe "#ESQL" do let(:config) do { "query" => "FROM test-index | STATS count() BY field", @@ -1473,38 +1473,6 @@ def extract_transport(client) # on 7.x client.transport is a ES::Transport::Clie end end end - - describe "with extra params" do - context "empty `query_params`" do - let(:config) { - super().merge('query_params' => {}) - } - - it "does not raise a configuration error" do - expect { plugin.send(:validate_query_params!) }.not_to raise_error - end - end - - context "with actual `drop_null_columns` value" do - let(:config) { - super().merge('query_params' => { 'drop_null_columns' => true }) - } - - it "does not raise a configuration error" do - expect { plugin.send(:validate_query_params!) }.not_to raise_error - end - end - - context "with extra non ES|QL params" do - let(:config) { - super().merge('query_params' => { 'drop_null_columns' => true, 'test' => 'hi'}) - } - - it "does not raise a configuration error" do - expect { plugin.send(:validate_query_params!) }.to raise_error(LogStash::ConfigurationError, "{\"test\"=>\"hi\"} not accepted when `response_type => 'esql'`") - end - end - end end end end From a92a71ef94c23f6717c9b7f1a7bc86fe13d6b10d Mon Sep 17 00:00:00 2001 From: Mashhur <99575341+mashhurs@users.noreply.github.com> Date: Tue, 6 May 2025 17:40:05 -0700 Subject: [PATCH 199/207] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Rye Biesemeyer Co-authored-by: João Duarte --- docs/index.asciidoc | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 4b57ad0e..a7ea2c10 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -234,18 +234,12 @@ The next scheduled run: ==== ES|QL support {es} Query Language (ES|QL) provides a SQL-like interface for querying your {es} data. -To utilize the ES|QL feature with this plugin, the following version requirements must be met: -[cols="1,2",options="header"] -|=== -|Component |Minimum version -|{es} |8.11.0 or newer -|{ls} |8.17.4 or newer -|This plugin |4.23.0+ (4.x series) or 5.2.0+ (5.x series) +To use {esql}, this plugin needs to be installed in {ls} 8.17.4 or newer, and must be connected to {es} 8.11 or newer. |=== -To configure ES|QL query in the plugin, set the `response_type` to `esql` and provide your ES|QL query in the `query` parameter. +To configure {esql} query in the plugin, set the `response_type` to `esql` and provide your {esql} query in the `query` parameter. -IMPORTANT: We recommend understanding https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-limitations.html[ES|QL current limitations] before using it in production environments. +IMPORTANT: {esql} is evolving and may still have limitations with regard to result size or supported field types. We recommend understanding https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-limitations.html[ES|QL current limitations] before using it in production environments. The following is a basic scheduled ES|QL query that runs hourly: [source, ruby] @@ -322,6 +316,8 @@ To illustrate the situation with example, assuming your mapping has a time `time } The ES|QL result will contain all three fields but the plugin cannot map them into {ls} event. + +This a common occurence if your template or mapping follows the pattern of always indexing strings as "text" (`field`) + " keyword" (`field.keyword`) multi-field. In this case it's recommended to do `KEEP field` if the string is identical and there is only one subfield as the engine will optimize and retrieve the keyword, otherwise you can do `KEEP field.keyword | RENAME field.keyword as field` . To avoid this, you can use the `RENAME` keyword to rename the `time` parent field to get all three fields with unique fields. [source, ruby] ... @@ -622,7 +618,7 @@ contents of the `aggregations` object of the query's response. In this case the 0 regardless of the default or user-defined value set in this plugin. When using the `esql` setting, the query must be a valid ES|QL string. -When this setting is active, `target`, `size`, `slices` and `search_api` parameters are ignored. +When this setting is active, `index`, `size`, `slices`, `search_api`, `docinfo`, `docinfo_target` and `docinfo_fields` parameters are not allowed. [id="plugins-{type}s-{plugin}-request_timeout_seconds"] ===== `request_timeout_seconds` From d4f559d4d6051855f0c0fe6923c90ba34f910693 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Tue, 6 May 2025 23:03:16 -0700 Subject: [PATCH 200/207] Apply code review suggestions: to use decorator as a proc call, doc syntax fix, unit test errors fix. --- docs/index.asciidoc | 1 - lib/logstash/inputs/elasticsearch.rb | 4 ++-- lib/logstash/inputs/elasticsearch/esql.rb | 4 ++-- spec/inputs/elasticsearch_esql_spec.rb | 4 ++-- spec/inputs/elasticsearch_spec.rb | 18 ++++++++++++++++++ 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index a7ea2c10..832341e6 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -235,7 +235,6 @@ The next scheduled run: {es} Query Language (ES|QL) provides a SQL-like interface for querying your {es} data. To use {esql}, this plugin needs to be installed in {ls} 8.17.4 or newer, and must be connected to {es} 8.11 or newer. -|=== To configure {esql} query in the plugin, set the `response_type` to `esql` and provide your {esql} query in the `query` parameter. diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index c5f596eb..43d5592b 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -312,8 +312,8 @@ def register if @response_type == 'esql' validate_ls_version_for_esql_support! validate_esql_query! - ignored_options = original_params.keys & %w(index size slices search_api, docinfo, docinfo_target, docinfo_fields) - @logger.info("Configured #{ignored_options} params are ignored in ES|QL query") if ignored_options&.size > 1 + not_allowed_options = original_params.keys & %w(index size slices search_api, docinfo, docinfo_target, docinfo_fields) + raise(LogStash::ConfigurationError, "Configured #{not_allowed_options} params are not allowed while using ES|QL query") if not_allowed_options&.size > 1 else @base_query = LogStash::Json.load(@query) if @slices diff --git a/lib/logstash/inputs/elasticsearch/esql.rb b/lib/logstash/inputs/elasticsearch/esql.rb index a6b9ed2c..3a5ac280 100644 --- a/lib/logstash/inputs/elasticsearch/esql.rb +++ b/lib/logstash/inputs/elasticsearch/esql.rb @@ -17,7 +17,7 @@ class Esql # @param plugin [LogStash::Inputs::Elasticsearch] The parent plugin instance def initialize(client, plugin) @client = client - @plugin = plugin + @event_decorator = plugin.method(:decorate_event) @target_field = plugin.params["target"] @retries = plugin.params["retries"] @@ -80,7 +80,7 @@ def process_response(columns, values, output_queue) event.set(field_reference, ESQL_PARSERS_BY_TYPE[column.type].call(value)) end end - @plugin.decorate_event(event) + @event_decorator.call(event) output_queue << event rescue => e # if event creation fails with whatever reason, inform user and tag with failure and return entry as it is diff --git a/spec/inputs/elasticsearch_esql_spec.rb b/spec/inputs/elasticsearch_esql_spec.rb index 1d6a12c5..6355db9d 100644 --- a/spec/inputs/elasticsearch_esql_spec.rb +++ b/spec/inputs/elasticsearch_esql_spec.rb @@ -6,7 +6,8 @@ describe LogStash::Inputs::Elasticsearch::Esql do let(:client) { instance_double(Elasticsearch::Client) } let(:esql_client) { double("esql-client") } - let(:plugin) { instance_double(LogStash::Inputs::Elasticsearch, params: plugin_config) } + + let(:plugin) { instance_double(LogStash::Inputs::Elasticsearch, params: plugin_config, decorate_event: nil) } let(:plugin_config) do { "query" => "FROM test-index | STATS count() BY field", @@ -19,7 +20,6 @@ it "sets up the ESQL client with correct parameters" do expect(esql_executor.instance_variable_get(:@query)).to eq(plugin_config["query"]) expect(esql_executor.instance_variable_get(:@retries)).to eq(plugin_config["retries"]) - expect(esql_executor.instance_variable_get(:@plugin)).to eq(plugin) expect(esql_executor.instance_variable_get(:@target_field)).to eq(nil) end end diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index bc404a51..77703d21 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1439,6 +1439,24 @@ def extract_transport(client) # on 7.x client.transport is a ES::Transport::Clie end end + context "ES|QL query and DSL params used together" do + let(:config) { + super().merge({ + "index" => "my-index", + "size" => 1, + "slices" => 1, + "search_api" => "auto", + "docinfo" => true, + "docinfo_target" => "[@metadata][docinfo]", + "docinfo_fields" => ["_index"], + })} + + it "raises a config error" do + mixed_fields = %w[index size slices docinfo_fields] + expect { plugin.register }.to raise_error(LogStash::ConfigurationError, /Configured #{mixed_fields} params are not allowed while using ES|QL query/) + end + end + describe "ES|QL query" do context "when query is valid" do it "does not raise an error" do From 65eb675c3bfe49dd5169be36ae95234b4e598573 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 8 May 2025 09:30:18 -0700 Subject: [PATCH 201/207] Rename warning msg field name to avoid conflicts. Generate a target apply method to avoid null checks at runtime. --- lib/logstash/inputs/elasticsearch/esql.rb | 12 +++++++++--- spec/inputs/elasticsearch_esql_spec.rb | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/logstash/inputs/elasticsearch/esql.rb b/lib/logstash/inputs/elasticsearch/esql.rb index 3a5ac280..9f95b4dd 100644 --- a/lib/logstash/inputs/elasticsearch/esql.rb +++ b/lib/logstash/inputs/elasticsearch/esql.rb @@ -18,9 +18,15 @@ class Esql def initialize(client, plugin) @client = client @event_decorator = plugin.method(:decorate_event) - @target_field = plugin.params["target"] @retries = plugin.params["retries"] + target_field = plugin.params["target"] + if target_field + def self.apply_target(path) = "[#{target_field}][#{path}]" + else + def self.apply_target(path) = path + end + @query = plugin.params["query"] unless @query.include?('METADATA') logger.info("`METADATA` not found the query. `_id`, `_version` and `_index` will not be available in the result", {:query => @query}) @@ -40,7 +46,7 @@ def do_run(output_queue, query) return if response == false if response&.headers&.dig("warning") - logger.warn("ES|QL executor received warning", {:message => response.headers["warning"]}) + logger.warn("ES|QL executor received warning", {:warning_message => response.headers["warning"]}) end columns = response['columns']&.freeze values = response['values']&.freeze @@ -76,7 +82,7 @@ def process_response(columns, values, output_queue) # `unless value.nil?` is a part of `drop_null_columns` that if some of columns' values are not `nil`, `nil` values appear # we should continuously filter out them to achieve full `drop_null_columns` on each individual row (ideal `LIMIT 1` result) unless value.nil? - field_reference = @target_field.nil? ? column.field_reference : "[#{@target_field}][#{column.field_reference}]" + field_reference = apply_target(column.field_reference) event.set(field_reference, ESQL_PARSERS_BY_TYPE[column.type].call(value)) end end diff --git a/spec/inputs/elasticsearch_esql_spec.rb b/spec/inputs/elasticsearch_esql_spec.rb index 6355db9d..dcc53c5b 100644 --- a/spec/inputs/elasticsearch_esql_spec.rb +++ b/spec/inputs/elasticsearch_esql_spec.rb @@ -69,7 +69,7 @@ it "logs a warning if the response contains a warning header" do allow(response).to receive(:headers).and_return({ "warning" => "some warning" }) - expect(esql_executor.logger).to receive(:warn).with("ES|QL executor received warning", { :message => "some warning" }) + expect(esql_executor.logger).to receive(:warn).with("ES|QL executor received warning", { :warning_message => "some warning" }) esql_executor.do_run(output_queue, plugin_config["query"]) end From 789f4676a3ffbc535bfa43d90fa328939c74a2ce Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 8 May 2025 12:17:31 -0700 Subject: [PATCH 202/207] Ignore sub-fields with warninigs and keep only parent. --- docs/index.asciidoc | 10 ++++--- lib/logstash/inputs/elasticsearch/esql.rb | 35 +++++++++++++++++++++-- spec/inputs/elasticsearch_esql_spec.rb | 32 +++++++++++++++++++-- 3 files changed, 68 insertions(+), 9 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 832341e6..5f38d129 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -304,8 +304,12 @@ NOTE: If your index has a mapping with sub-objects where `status.code` and `stat ===== Conflict on multi-fields ES|QL query fetches all parent and sub-fields fields if your {es} index has https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/multi-fields[multi-fields] or https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/subobjects[subobjects]. -Since {ls} events cannot contain parent field's concrete value and sub-field values together, the plugin cannot map the result to {ls} event and produces `_elasticsearch_input_failure` tagged failed event. -We recommend using the `RENAME` (or `DROP`) keyword in your ES|QL query explicitly rename the fields to overcome this issue. +Since {ls} events cannot contain parent field's concrete value and sub-field values together, the plugin ignores sub-fields with warning and includes parent. +We recommend using the `RENAME` (or `DROP` to avoid warnings) keyword in your ES|QL query explicitly rename the fields to include sub-fields into the event. + +This a common occurrence if your template or mapping follows the pattern of always indexing strings as "text" (`field`) + " keyword" (`field.keyword`) multi-field. +In this case it's recommended to do `KEEP field` if the string is identical and there is only one subfield as the engine will optimize and retrieve the keyword, otherwise you can do `KEEP field.keyword | RENAME field.keyword as field`. + To illustrate the situation with example, assuming your mapping has a time `time` field with `time.min` and `time.max` sub-fields as following: [source, ruby] "properties": { @@ -315,8 +319,6 @@ To illustrate the situation with example, assuming your mapping has a time `time } The ES|QL result will contain all three fields but the plugin cannot map them into {ls} event. - -This a common occurence if your template or mapping follows the pattern of always indexing strings as "text" (`field`) + " keyword" (`field.keyword`) multi-field. In this case it's recommended to do `KEEP field` if the string is identical and there is only one subfield as the engine will optimize and retrieve the keyword, otherwise you can do `KEEP field.keyword | RENAME field.keyword as field` . To avoid this, you can use the `RENAME` keyword to rename the `time` parent field to get all three fields with unique fields. [source, ruby] ... diff --git a/lib/logstash/inputs/elasticsearch/esql.rb b/lib/logstash/inputs/elasticsearch/esql.rb index 9f95b4dd..30afb723 100644 --- a/lib/logstash/inputs/elasticsearch/esql.rb +++ b/lib/logstash/inputs/elasticsearch/esql.rb @@ -22,9 +22,9 @@ def initialize(client, plugin) target_field = plugin.params["target"] if target_field - def self.apply_target(path) = "[#{target_field}][#{path}]" + def self.apply_target(path); "[#{target_field}][#{path}]"; end else - def self.apply_target(path) = path + def self.apply_target(path); path; end end @query = plugin.params["query"] @@ -77,11 +77,16 @@ def retryable(job_name, &block) # @param output_queue [Queue] The queue to push processed events to def process_response(columns, values, output_queue) column_specs = columns.map { |column| ColumnSpec.new(column) } + sub_element_mark_map = mark_sub_elements(column_specs) + multi_fields = sub_element_mark_map.filter_map { |key, val| key.name if val == true } + logger.warn("Multi-fields found in ES|QL result and they will not be available in the event. Please use `RENAME` command if you want to include them.", { :detected_multi_fields => multi_fields }) if multi_fields.any? + values.each do |row| event = column_specs.zip(row).each_with_object(LogStash::Event.new) do |(column, value), event| # `unless value.nil?` is a part of `drop_null_columns` that if some of columns' values are not `nil`, `nil` values appear # we should continuously filter out them to achieve full `drop_null_columns` on each individual row (ideal `LIMIT 1` result) - unless value.nil? + # we also exclude sub-elements of main field + if value && sub_element_mark_map[column] == false field_reference = apply_target(column.field_reference) event.set(field_reference, ESQL_PARSERS_BY_TYPE[column.type].call(value)) end @@ -95,6 +100,30 @@ def process_response(columns, values, output_queue) output_queue << failed_event end end + + # Determines whether each column in a collection is a nested sub-element (example "user.age") + # of another column in the same collection (example "user"). + # + # @param columns [Array] An array of objects with a `name` attribute representing field paths. + # @return [Hash] A hash mapping each column to `true` if it is a sub-element of another field, `false` otherwise. + # Time complexity: (O(NlogN+N*K)) where K is the number of conflict depth + # without (`prefix_set`) memoization, it would be O(N^2) + def mark_sub_elements(columns) + # Sort columns by name length (ascending) + sorted_columns = columns.sort_by { |c| c.name.length } + prefix_set = Set.new # memoization set + + sorted_columns.each_with_object({}) do |column, memo| + # Split the column name into parts (e.g., "user.profile.age" → ["user", "profile", "age"]) + parts = column.name.split('.') + + # Generate all possible parent prefixes (e.g., "user", "user.profile") + # and check if any parent prefix exists in the set + parent_prefixes = (0...parts.size - 1).map { |i| parts[0..i].join('.') } + memo[column] = parent_prefixes.any? { |prefix| prefix_set.include?(prefix) } + prefix_set.add(column.name) + end + end end # Class representing a column specification in the ESQL response['columns'] diff --git a/spec/inputs/elasticsearch_esql_spec.rb b/spec/inputs/elasticsearch_esql_spec.rb index dcc53c5b..f958dea3 100644 --- a/spec/inputs/elasticsearch_esql_spec.rb +++ b/spec/inputs/elasticsearch_esql_spec.rb @@ -54,7 +54,6 @@ before do allow(esql_executor).to receive(:retryable).and_yield allow(client).to receive_message_chain(:esql, :query).and_return(response) - allow(plugin).to receive(:decorate_event) end it "executes the ESQL query and processes the results" do @@ -87,7 +86,6 @@ before do allow(esql_executor).to receive(:retryable).and_yield allow(client).to receive_message_chain(:esql, :query).and_return(response) - allow(plugin).to receive(:decorate_event) allow(response).to receive(:headers).and_return({}) end @@ -125,6 +123,36 @@ end end end + + context "when sub-elements occur in the result" do + let(:response) { { + 'values' => [[50, 1, 100], [50, 0, 1000], [50, 9, 99999]], + 'columns' => + [ + { 'name' => 'time', 'type' => 'long' }, + { 'name' => 'time.min', 'type' => 'long' }, + { 'name' => 'time.max', 'type' => 'long' }, + ] + } } + + before do + allow(esql_executor).to receive(:retryable).and_yield + allow(client).to receive_message_chain(:esql, :query).and_return(response) + allow(response).to receive(:headers).and_return({}) + end + + it "includes 1st depth elements into event" do + esql_executor.do_run(output_queue, plugin_config["query"]) + + expect(output_queue.size).to eq(3) + 3.times do + event = output_queue.pop + expect(event.get('time')).to eq(50) + expect(event.get('[time][min]')).to eq(nil) + expect(event.get('[time][max]')).to eq(nil) + end + end + end end describe "#column spec" do From fefe6a070913c59293bce49cc0ff677c8bef1aa6 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 26 May 2025 11:03:01 -0700 Subject: [PATCH 203/207] Introduce at high level which other params such as , etc.. follow it. It validates the shape to send a valid query type to the ES. --- docs/index.asciidoc | 46 +++++++++++-------- lib/logstash/inputs/elasticsearch.rb | 45 +++++++++--------- spec/inputs/elasticsearch_spec.rb | 8 ++-- .../integration/elasticsearch_esql_spec.rb | 2 +- 4 files changed, 55 insertions(+), 46 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 5f38d129..2ea3acd5 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -231,23 +231,23 @@ The next scheduled run: * updates the value of the field at the end of the pagination. [id="plugins-{type}s-{plugin}-esql"] -==== ES|QL support -{es} Query Language (ES|QL) provides a SQL-like interface for querying your {es} data. +==== {esql} support +{es} Query Language ({esql}) provides a SQL-like interface for querying your {es} data. To use {esql}, this plugin needs to be installed in {ls} 8.17.4 or newer, and must be connected to {es} 8.11 or newer. -To configure {esql} query in the plugin, set the `response_type` to `esql` and provide your {esql} query in the `query` parameter. +To configure {esql} query in the plugin, set the `query_type` to `esql` and provide your {esql} query in the `query` parameter. IMPORTANT: {esql} is evolving and may still have limitations with regard to result size or supported field types. We recommend understanding https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-limitations.html[ES|QL current limitations] before using it in production environments. -The following is a basic scheduled ES|QL query that runs hourly: +The following is a basic scheduled {esql} query that runs hourly: [source, ruby] input { elasticsearch { id => hourly_cron_job hosts => [ 'https://../'] api_key => '....' - response_type => 'esql' + query_type => 'esql' query => ' FROM food-index | WHERE spicy_level = "hot" AND @timestamp > NOW() - 1 hour @@ -259,11 +259,11 @@ The following is a basic scheduled ES|QL query that runs hourly: Set `config.support_escapes: true` in `logstash.yml` if you need to escape special chars in the query. -NOTE: With ES|QL query, {ls} doesn't generate `event.original`. +NOTE: With {esql} query, {ls} doesn't generate `event.original`. [id="plugins-{type}s-{plugin}-esql-event-mapping"] -===== Mapping ES|QL result to {ls} event -ES|QL returns query results in a structured tabular format, where data is organized into _columns_ (fields) and _values_ (entries). +===== Mapping {esql} result to {ls} event +{esql} returns query results in a structured tabular format, where data is organized into _columns_ (fields) and _values_ (entries). The plugin maps each value entry to an event, populating corresponding fields. For example, a query might produce a table like: @@ -303,9 +303,9 @@ NOTE: If your index has a mapping with sub-objects where `status.code` and `stat [id="plugins-{type}s-{plugin}-esql-multifields"] ===== Conflict on multi-fields -ES|QL query fetches all parent and sub-fields fields if your {es} index has https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/multi-fields[multi-fields] or https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/subobjects[subobjects]. +{esql} query fetches all parent and sub-fields fields if your {es} index has https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/multi-fields[multi-fields] or https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/subobjects[subobjects]. Since {ls} events cannot contain parent field's concrete value and sub-field values together, the plugin ignores sub-fields with warning and includes parent. -We recommend using the `RENAME` (or `DROP` to avoid warnings) keyword in your ES|QL query explicitly rename the fields to include sub-fields into the event. +We recommend using the `RENAME` (or `DROP` to avoid warnings) keyword in your {esql} query explicitly rename the fields to include sub-fields into the event. This a common occurrence if your template or mapping follows the pattern of always indexing strings as "text" (`field`) + " keyword" (`field.keyword`) multi-field. In this case it's recommended to do `KEEP field` if the string is identical and there is only one subfield as the engine will optimize and retrieve the keyword, otherwise you can do `KEEP field.keyword | RENAME field.keyword as field`. @@ -318,14 +318,14 @@ To illustrate the situation with example, assuming your mapping has a time `time "time.max": { "type": "long" } } -The ES|QL result will contain all three fields but the plugin cannot map them into {ls} event. +The {esql} result will contain all three fields but the plugin cannot map them into {ls} event. To avoid this, you can use the `RENAME` keyword to rename the `time` parent field to get all three fields with unique fields. [source, ruby] ... query => 'FROM my-index | RENAME time AS time.current' ... -For comprehensive ES|QL syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[{es} ES|QL documentation]. +For comprehensive {esql} syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[{esql} documentation]. [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Input configuration options @@ -354,7 +354,8 @@ Please check out <> for details. | <> |<>|No | <> |<>|No | <> |<>|No -| <> |<>, one of `["hits","aggregations","esql"]`|No +| <> |<>, one of `["dsl","esql"]`|No +| <> |<>, one of `["hits","aggregations"]`|No | <> | <>|No | <> |<>|No | <> |<>|No @@ -596,12 +597,22 @@ environment variables e.g. `proxy => '${LS_PROXY:}'`. * Default value is `'{ "sort": [ "_doc" ] }'` The query to be executed. -Accepted query shape is DSL or ES|QL (when `response_type => 'esql'`). -Read the {ref}/query-dsl.html[{es} query DSL documentation] or {ref}/esql.html[{es} ES|QL documentation] for more information. +Accepted query shape is DSL or {esql} (when `query_type => 'esql'`). +Read the {ref}/query-dsl.html[{es} query DSL documentation] or {ref}/esql.html[{esql} documentation] for more information. When <> resolves to `search_after` and the query does not specify `sort`, the default sort `'{ "sort": { "_shard_doc": "asc" } }'` will be added to the query. Please refer to the {ref}/paginate-search-results.html#search-after[Elasticsearch search_after] parameter to know more. +[id="plugins-{type}s-{plugin}-query_type"] +===== `query_type` + +* Value can be `dsl` or `esql` +* Default value is `dsl` + +Defines the <> shape. +When `dsl`, the query shape must be valid {es} JSON-style string. +When `esql`, the query shape must be a valid {esql} string and `index`, `size`, `slices`, `search_api`, `docinfo`, `docinfo_target`, `docinfo_fields`, `response_type` and `tracking_field` parameters are not allowed. + [id="plugins-{type}s-{plugin}-response_type"] ===== `response_type` @@ -613,14 +624,11 @@ response from the query. The default `hits` will generate one event per returned document (i.e. "hit"). -When set to `aggregations`, a single Logstash event will be generated with the +When set to `aggregations`, a single {ls} event will be generated with the contents of the `aggregations` object of the query's response. In this case the `hits` object will be ignored. The parameter `size` will be always be set to 0 regardless of the default or user-defined value set in this plugin. -When using the `esql` setting, the query must be a valid ES|QL string. -When this setting is active, `index`, `size`, `slices`, `search_api`, `docinfo`, `docinfo_target` and `docinfo_fields` parameters are not allowed. - [id="plugins-{type}s-{plugin}-request_timeout_seconds"] ===== `request_timeout_seconds` diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index 43d5592b..b7b477b8 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -97,18 +97,21 @@ class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base # The index or alias to search. config :index, :validate => :string, :default => "logstash-*" - # The query to be executed. DSL or ES|QL (when `response_type => 'esql'`) query shape is accepted. + # A type of Elasticsearch query, provided by @query. This will validate query shape and other params. + config :query_type, :validate => %w[dsl esql], :default => 'dsl' + + # The query to be executed. DSL or ES|QL (when `query_type => 'esql'`) query shape is accepted. # Read the following documentations for more info # Query DSL: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html # ES|QL: https://www.elastic.co/guide/en/elasticsearch/reference/current/esql.html config :query, :validate => :string, :default => '{ "sort": [ "_doc" ] }' - # This allows you to speccify the response type: one of [hits, aggregations, esql] + # This allows you to specify the DSL response type: one of [hits, aggregations] # where # hits: normal search request # aggregations: aggregation request - # esql: ES|QL request - config :response_type, :validate => %w[hits aggregations esql], :default => 'hits' + # Note that this param is invalid when `query_type => 'esql'`, ES|QL response shape is always a tabular format + config :response_type, :validate => %w[hits aggregations], :default => 'hits' # This allows you to set the maximum number of hits returned per scroll. config :size, :validate => :number, :default => 1000 @@ -309,10 +312,10 @@ def register fill_hosts_from_cloud_id setup_ssl_params! - if @response_type == 'esql' + if @query_type == 'esql' validate_ls_version_for_esql_support! validate_esql_query! - not_allowed_options = original_params.keys & %w(index size slices search_api, docinfo, docinfo_target, docinfo_fields) + not_allowed_options = original_params.keys & %w(index size slices search_api docinfo docinfo_target docinfo_fields response_type tracking_field) raise(LogStash::ConfigurationError, "Configured #{not_allowed_options} params are not allowed while using ES|QL query") if not_allowed_options&.size > 1 else @base_query = LogStash::Json.load(@query) @@ -361,7 +364,7 @@ def register setup_search_api - setup_query_executor + @query_executor = create_query_executor setup_cursor_tracker @@ -396,7 +399,7 @@ def decorate_event(event) private def get_query_object - return @query if @response_type == 'esql' + return @query if @query_type == 'esql' if @cursor_tracker query = @cursor_tracker.inject_cursor(@query) @logger.debug("new query is #{query}") @@ -685,20 +688,16 @@ def setup_search_api end - def setup_query_executor - @query_executor = case @response_type - when 'hits' - if @resolved_search_api == "search_after" - LogStash::Inputs::Elasticsearch::SearchAfter.new(@client, self) - else - logger.warn("scroll API is no longer recommended for pagination. Consider using search_after instead.") if es_major_version >= 8 - LogStash::Inputs::Elasticsearch::Scroll.new(@client, self) - end - when 'aggregations' - LogStash::Inputs::Elasticsearch::Aggregation.new(@client, self) - when 'esql' - LogStash::Inputs::Elasticsearch::Esql.new(@client, self) - end + def create_query_executor + return LogStash::Inputs::Elasticsearch::Esql.new(@client, self) if @query_type == 'esql' + + # DSL query executor + return LogStash::Inputs::Elasticsearch::Aggregation.new(@client, self) if @response_type == 'aggregations' + # response_type is hits, executor can be search_after or scroll type + return LogStash::Inputs::Elasticsearch::SearchAfter.new(@client, self) if @resolved_search_api == "search_after" + + logger.warn("scroll API is no longer recommended for pagination. Consider using search_after instead.") if es_major_version >= 8 + LogStash::Inputs::Elasticsearch::Scroll.new(@client, self) end def setup_cursor_tracker @@ -751,7 +750,7 @@ def validate_esql_query! end def validate_es_for_esql_support! - return unless @response_type == 'esql' + return unless @query_type == 'esql' # make sure connected ES supports ES|QL (8.11+) es_supports_esql = Gem::Version.create(es_version) >= Gem::Version.create(ES_ESQL_SUPPORT_VERSION) fail("Connected Elasticsearch #{es_version} version does not supports ES|QL. ES|QL feature requires at least Elasticsearch #{ES_ESQL_SUPPORT_VERSION} version.") unless es_supports_esql diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 77703d21..02787dfe 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -1374,7 +1374,7 @@ def extract_transport(client) # on 7.x client.transport is a ES::Transport::Clie let(:config) do { "query" => "FROM test-index | STATS count() BY field", - "response_type" => "esql", + "query_type" => "esql", "retries" => 3 } end @@ -1387,8 +1387,8 @@ def extract_transport(client) # on 7.x client.transport is a ES::Transport::Clie describe "#initialize" do it "sets up the ESQL client with correct parameters" do + expect(plugin.instance_variable_get(:@query_type)).to eq(config["query_type"]) expect(plugin.instance_variable_get(:@query)).to eq(config["query"]) - expect(plugin.instance_variable_get(:@response_type)).to eq(config["response_type"]) expect(plugin.instance_variable_get(:@retries)).to eq(config["retries"]) end end @@ -1449,10 +1449,12 @@ def extract_transport(client) # on 7.x client.transport is a ES::Transport::Clie "docinfo" => true, "docinfo_target" => "[@metadata][docinfo]", "docinfo_fields" => ["_index"], + "response_type" => "hits", + "tracking_field" => "[@metadata][tracking]" })} it "raises a config error" do - mixed_fields = %w[index size slices docinfo_fields] + mixed_fields = %w[index size slices docinfo_fields response_type tracking_field] expect { plugin.register }.to raise_error(LogStash::ConfigurationError, /Configured #{mixed_fields} params are not allowed while using ES|QL query/) end end diff --git a/spec/inputs/integration/elasticsearch_esql_spec.rb b/spec/inputs/integration/elasticsearch_esql_spec.rb index b27b8dd5..b25f65a5 100644 --- a/spec/inputs/integration/elasticsearch_esql_spec.rb +++ b/spec/inputs/integration/elasticsearch_esql_spec.rb @@ -23,7 +23,7 @@ let(:config) do { "hosts" => ES_HOSTS, - "response_type" => "esql" + "query_type" => "esql" } end let(:es_client) do From e108c87d94d013a9f481740bfed72919d6c5b005 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Wed, 28 May 2025 11:06:25 -0700 Subject: [PATCH 204/207] Add a tech preview fior the ESQL. --- docs/index.asciidoc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 2ea3acd5..677b11ec 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -232,6 +232,13 @@ The next scheduled run: [id="plugins-{type}s-{plugin}-esql"] ==== {esql} support + +.Technical Preview +**** +The {esql} feature that allows using ES|QL queries with this plugin is in Technical Preview. +Configuration options and implementation details are subject to change in minor releases without being preceded by deprecation warnings. +**** + {es} Query Language ({esql}) provides a SQL-like interface for querying your {es} data. To use {esql}, this plugin needs to be installed in {ls} 8.17.4 or newer, and must be connected to {es} 8.11 or newer. From a11ee4a39593ce7a6a0789f3e975e1e8722b0b20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20C=C3=A1mara?= Date: Fri, 19 Sep 2025 09:32:36 +0200 Subject: [PATCH 205/207] Remove non available stack release versions and add 9.x to CI integration tests --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 23a3c486..f2356fc9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,8 +10,8 @@ jobs: env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=7.current - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.previous - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.current - - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.next - - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.future + - env: INTEGRATION=true SNAPSHOT=false LOG_LEVEL=info ELASTIC_STACK_VERSION=9.current + - env: INTEGRATION=true SNAPSHOT=true LOG_LEVEL=info ELASTIC_STACK_VERSION=9.next - stage: "Secure Integration Tests" env: SECURE_INTEGRATION=true INTEGRATION=true LOG_LEVEL=info ELASTIC_STACK_VERSION=8.current SNAPSHOT=true - env: SECURE_INTEGRATION=true INTEGRATION=true LOG_LEVEL=info ELASTIC_STACK_VERSION=7.current From efcc00b674d3eaf32a61add3a9bfb9aadcd14163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20C=C3=A1mara=20Lara?= Date: Tue, 23 Sep 2025 15:18:19 +0200 Subject: [PATCH 206/207] Support both, encoded and non encoded api-key formats on plugin configuration (#237) * Support both, encoded and non encoded api-key formats on plugin configuration * Solved code style issues * Updated gemspec file --- CHANGELOG.md | 3 +++ lib/logstash/inputs/elasticsearch.rb | 10 ++++++++-- logstash-input-elasticsearch.gemspec | 2 +- spec/inputs/elasticsearch_spec.rb | 26 ++++++++++++++++++++------ 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d63c5df0..bf4d4fac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 5.2.1 +- Added support for encoded and non encoded api-key formats on plugin configuration [#237](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/237) + ## 5.2.0 - ES|QL support [#233](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/233) diff --git a/lib/logstash/inputs/elasticsearch.rb b/lib/logstash/inputs/elasticsearch.rb index b7b477b8..c16b714d 100644 --- a/lib/logstash/inputs/elasticsearch.rb +++ b/lib/logstash/inputs/elasticsearch.rb @@ -563,12 +563,18 @@ def setup_basic_auth(user, password) end def setup_api_key(api_key) - return {} unless (api_key && api_key.value) + return {} unless (api_key&.value) - token = ::Base64.strict_encode64(api_key.value) + token = base64?(api_key.value) ? api_key.value : Base64.strict_encode64(api_key.value) { 'Authorization' => "ApiKey #{token}" } end + def base64?(string) + string == Base64.strict_encode64(Base64.strict_decode64(string)) + rescue ArgumentError + false + end + def prepare_user_agent os_name = java.lang.System.getProperty('os.name') os_version = java.lang.System.getProperty('os.version') diff --git a/logstash-input-elasticsearch.gemspec b/logstash-input-elasticsearch.gemspec index 976b3ef5..cf72d539 100644 --- a/logstash-input-elasticsearch.gemspec +++ b/logstash-input-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-input-elasticsearch' - s.version = '5.2.0' + s.version = '5.2.1' s.licenses = ['Apache License (2.0)'] s.summary = "Reads query results from an Elasticsearch cluster" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/inputs/elasticsearch_spec.rb b/spec/inputs/elasticsearch_spec.rb index 02787dfe..18bb4097 100644 --- a/spec/inputs/elasticsearch_spec.rb +++ b/spec/inputs/elasticsearch_spec.rb @@ -823,14 +823,28 @@ def synchronize_method!(object, method_name) end context "with ssl" do - let(:config) { super().merge({ 'api_key' => LogStash::Util::Password.new('foo:bar'), "ssl_enabled" => true }) } + let(:api_key_value) { nil } + let(:config) { super().merge("ssl_enabled" => true, 'api_key' => LogStash::Util::Password.new(api_key_value)) } + let(:encoded_api_key) { Base64.strict_encode64('foo:bar') } - it "should set authorization" do - plugin.register - client = plugin.send(:client) - auth_header = extract_transport(client).options[:transport_options][:headers]['Authorization'] + shared_examples "a plugin that sets the ApiKey authorization header" do + it "correctly sets the Authorization header" do + plugin.register + client = plugin.send(:client) + auth_header = extract_transport(client).options[:transport_options][:headers]['Authorization'] + + expect(auth_header).to eql("ApiKey #{encoded_api_key}") + end + end + + context "with a non-encoded API key" do + let(:api_key_value) { "foo:bar" } + it_behaves_like "a plugin that sets the ApiKey authorization header" + end - expect( auth_header ).to eql "ApiKey #{Base64.strict_encode64('foo:bar')}" + context "with an encoded API key" do + let(:api_key_value) { encoded_api_key } + it_behaves_like "a plugin that sets the ApiKey authorization header" end context 'user also set' do From db271e50e4d6a88a7cb210462eaa953812c5d5d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20C=C3=A1mara=20Lara?= Date: Tue, 23 Sep 2025 16:09:24 +0200 Subject: [PATCH 207/207] Fix Changelog indent for 5.2.1 version (#239) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf4d4fac..cd025e52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ ## 5.2.1 -- Added support for encoded and non encoded api-key formats on plugin configuration [#237](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/237) + - Added support for encoded and non encoded api-key formats on plugin configuration [#237](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/237) ## 5.2.0 - ES|QL support [#233](https://github.com/logstash-plugins/logstash-input-elasticsearch/pull/233)