From 58b71ccaf1c5224ecb64d8645cfd26a3501615fd Mon Sep 17 00:00:00 2001 From: Max Hniebergall <137079448+maxhniebergall@users.noreply.github.com> Date: Fri, 13 Sep 2024 10:57:51 -0400 Subject: [PATCH 01/38] [backport 8.16] Zero dimensional tensor (#2723) * Change result writing to allow for a single value * change to debug logging * Wrap output in extra array to make 3D. Improve debug logging formatting * Added unit test * Add changelog * formatting * Fix reshaping of output tensor to only resahep zero-dimensional tensors * Use std::move * Backport fix by removing debug line; also remove std::move for reshape * Update CHANGELOG.asciidoc --- bin/pytorch_inference/CResultWriter.cc | 3 +++ bin/pytorch_inference/CResultWriter.h | 18 ++++++++++++++++++ bin/pytorch_inference/Main.cc | 7 ++++++- .../unittest/CResultWriterTest.cc | 12 ++++++++++++ docs/CHANGELOG.asciidoc | 4 ++++ 5 files changed, 43 insertions(+), 1 deletion(-) diff --git a/bin/pytorch_inference/CResultWriter.cc b/bin/pytorch_inference/CResultWriter.cc index b4ca0baeb0..34389dad44 100644 --- a/bin/pytorch_inference/CResultWriter.cc +++ b/bin/pytorch_inference/CResultWriter.cc @@ -136,6 +136,9 @@ std::string CResultWriter::createInnerResult(const ::torch::Tensor& results) { case 2: this->writePrediction<2>(results, jsonWriter); break; + case 1: + this->writePrediction<1>(results, jsonWriter); + break; default: { std::ostringstream ss; ss << "Cannot convert results tensor of size [" << sizes << ']'; diff --git a/bin/pytorch_inference/CResultWriter.h b/bin/pytorch_inference/CResultWriter.h index 037a2769f5..8d809dc9df 100644 --- a/bin/pytorch_inference/CResultWriter.h +++ b/bin/pytorch_inference/CResultWriter.h @@ -191,6 +191,24 @@ class CResultWriter : public TStringBufWriter { jsonWriter.onObjectEnd(); } + //! Write a 1D inference result + template + void writeInferenceResults(const ::torch::TensorAccessor& accessor, + TStringBufWriter& jsonWriter) { + + jsonWriter.onKey(RESULT); + jsonWriter.onObjectBegin(); + jsonWriter.onKey(INFERENCE); + // The Java side requires a 3D array, so wrap the 1D result in an + // extra outer array twice. + jsonWriter.onArrayBegin(); + jsonWriter.onArrayBegin(); + this->writeTensor(accessor, jsonWriter); + jsonWriter.onArrayEnd(); + jsonWriter.onArrayEnd(); + jsonWriter.onObjectEnd(); + } + private: core::CJsonOutputStreamWrapper m_WrappedOutputStream; }; diff --git a/bin/pytorch_inference/Main.cc b/bin/pytorch_inference/Main.cc index 662810a48c..92db0aacec 100644 --- a/bin/pytorch_inference/Main.cc +++ b/bin/pytorch_inference/Main.cc @@ -73,7 +73,12 @@ torch::Tensor infer(torch::jit::script::Module& module_, // For transformers the result tensor is the first element in a tuple. all.push_back(output.toTuple()->elements()[0].toTensor()); } else { - all.push_back(output.toTensor()); + auto outputTensor = output.toTensor(); + if (outputTensor.dim() == 0) { // If the output is a scaler, we need to reshape it into a 1D tensor + all.push_back(outputTensor.reshape({1, 1})); + } else { + all.push_back(std::move(outputTensor)); + } } inputs.clear(); diff --git a/bin/pytorch_inference/unittest/CResultWriterTest.cc b/bin/pytorch_inference/unittest/CResultWriterTest.cc index 99333db8c3..97b99038a2 100644 --- a/bin/pytorch_inference/unittest/CResultWriterTest.cc +++ b/bin/pytorch_inference/unittest/CResultWriterTest.cc @@ -80,6 +80,18 @@ BOOST_AUTO_TEST_CASE(testCreateInnerInferenceResult) { BOOST_REQUIRE_EQUAL(expected, innerPortion); } +BOOST_AUTO_TEST_CASE(testCreateInnerInferenceResultFor1DimensionalResult) { + std::ostringstream output; + ml::torch::CResultWriter resultWriter{output}; + ::torch::Tensor tensor{::torch::ones({1})}; + std::string innerPortion{resultWriter.createInnerResult(tensor)}; + std::string expected = "\"result\":{\"inference\":" + "[[[1]]]}"; + LOG_INFO(<< "expected: " << expected); + LOG_INFO(<< "actual: " << innerPortion); + BOOST_REQUIRE_EQUAL(expected, innerPortion); +} + BOOST_AUTO_TEST_CASE(testWrapAndWriteInferenceResult) { std::string innerPortion{ "\"result\":{\"inference\":" diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 6f98ce1ff8..9c1d528166 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -35,6 +35,10 @@ * Allow the user to force a detector to shift time series state by a specific amount. (See {ml-pull}2695[#2695].) +=== Bug Fixes + +* Allow for pytorch_inference results to include zero-dimensional tensors. + == {es} version 8.15.2 === Enhancements From fc5d248911a1a969b2e136098397fbad6efcd8e7 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Thu, 19 Sep 2024 15:39:44 +1200 Subject: [PATCH 02/38] [8.16][ML] Fix failing testFileIoIsGood test on mac x86 (#2728) (#2732) Bump a test thread timeout to increase the chance of successfully reading from a test file on slower machines. Prior to this change the `CIoManagerTest/testFileIoIsGood` test has been failing approx 5 - 15 pct of the time. Testing the change with this script ``` testFailures=0; for i in {1..1000}; do ../../../cmake-build-relwithdebinfo/test/lib/api/unittest/ml_test_api --run_test=CIoManagerTest/testFileIoGood; if [ $? != 0 ]; then ((++testFailures)); fi; done; echo $testFailures ``` echoes `0` upon completion. Backports #2728 --- lib/api/unittest/CIoManagerTest.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/api/unittest/CIoManagerTest.cc b/lib/api/unittest/CIoManagerTest.cc index fcbdd1f763..1c99ebb46e 100644 --- a/lib/api/unittest/CIoManagerTest.cc +++ b/lib/api/unittest/CIoManagerTest.cc @@ -29,7 +29,7 @@ BOOST_AUTO_TEST_SUITE(CIoManagerTest) namespace { const std::uint32_t SLEEP_TIME_MS{100}; -const std::uint32_t PAUSE_TIME_MS{10}; +const std::uint32_t PAUSE_TIME_MS{40}; const std::size_t MAX_ATTEMPTS{100}; const std::size_t TEST_SIZE{10000}; const char TEST_CHAR{'a'}; From fbf586214cf6a0476b81e0edd1e7725a672b6d70 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Thu, 19 Sep 2024 17:20:32 +1200 Subject: [PATCH 03/38] [8.16][ML] Correct home for macOS x86_64 test results (#2725) (#2735) Ensure that macOS x86_64 unit test results are stored correctly in Buildkite test analytics. Backports #2725 --- .buildkite/hooks/post-checkout | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.buildkite/hooks/post-checkout b/.buildkite/hooks/post-checkout index 7ed9a76c55..2833da5326 100644 --- a/.buildkite/hooks/post-checkout +++ b/.buildkite/hooks/post-checkout @@ -21,6 +21,8 @@ if [[ "$BUILDKITE_PIPELINE_SLUG" == ml-cpp* ]]; then export BUILDKITE_ANALYTICS_TOKEN=$(vault read secret/ci/elastic-ml-cpp/buildkite/test_analytics/linux_aarch64 | awk '/^token/ {print $2;}') elif [[ "$BUILDKITE_STEP_KEY" == "build_test_macos-aarch64-RelWithDebInfo" ]]; then export BUILDKITE_ANALYTICS_TOKEN=$(vault read secret/ci/elastic-ml-cpp/buildkite/test_analytics/macos_aarch64 | awk '/^token/ {print $2;}') + elif [[ "$BUILDKITE_STEP_KEY" == "build_test_macos-x86_64-RelWithDebInfo" ]]; then + export BUILDKITE_ANALYTICS_TOKEN=$(vault read secret/ci/elastic-ml-cpp/buildkite/test_analytics/macos_x86_64 | awk '/^token/ {print $2;}') else [[ "$BUILDKITE_STEP_KEY" == "build_test_Windows-x86_64-RelWithDebInfo" ]] export BUILDKITE_ANALYTICS_TOKEN=$(vault read secret/ci/elastic-ml-cpp/buildkite/test_analytics/windows_x86_64 | awk '/^token/ {print $2;}') fi From 0bc01ba0e12e9bc8c634d096e79f740df621bf80 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Tue, 24 Sep 2024 14:01:24 +0200 Subject: [PATCH 04/38] [ML] Change time shift sign for intuitive configuration (#2737) (#2743) Backport for #2737 --- lib/model/CDetectionRule.cc | 7 ++++++- lib/model/unittest/CDetectionRuleTest.cc | 8 ++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/lib/model/CDetectionRule.cc b/lib/model/CDetectionRule.cc index 0ad0c21d76..c6f8dd3531 100644 --- a/lib/model/CDetectionRule.cc +++ b/lib/model/CDetectionRule.cc @@ -87,7 +87,12 @@ void CDetectionRule::addTimeShift(core_t::TTime timeShift) { timeShiftApplied.end()) { // When the callback is executed, the model is already in the correct time // interval. Hence, we need to shift the time right away. - model.shiftTime(time, timeShift); + // IMPLEMENTATION DECISION: We apply the negative amount of time shift to the + // model. This is because the time shift is applied to the model's frame of reference + // and not the global time. This allows a more intuitive configuration from the user's + // perspective: in spring we move the clock forward, and the time shift is positive, in + // autumn we move the clock backward, and the time shift is negative. + model.shiftTime(time, -timeShift); timeShiftApplied.emplace_back(&model); } }); diff --git a/lib/model/unittest/CDetectionRuleTest.cc b/lib/model/unittest/CDetectionRuleTest.cc index 09c408790e..ec41d0a85c 100644 --- a/lib/model/unittest/CDetectionRuleTest.cc +++ b/lib/model/unittest/CDetectionRuleTest.cc @@ -1020,8 +1020,8 @@ BOOST_FIXTURE_TEST_CASE(testRuleTimeShiftShouldShiftTimeSeriesModelState, CTestF rule.executeCallback(*model, timestamp); // the time series model should have been shifted by specified amount. - BOOST_TEST_REQUIRE(trendModel.lastValueTime() == lastValueTime + timeShiftInSecs); - BOOST_TEST_REQUIRE(trendModel.timeShift() == timeShiftInSecs); + BOOST_TEST_REQUIRE(trendModel.lastValueTime() == lastValueTime - timeShiftInSecs); + BOOST_TEST_REQUIRE(trendModel.timeShift() == -timeShiftInSecs); // and an annotation should have been added to the model BOOST_TEST_REQUIRE(annotations.size() == numAnnotationsBeforeShift + 1); @@ -1091,8 +1091,8 @@ BOOST_FIXTURE_TEST_CASE(testTwoTimeShiftRuleShouldShiftTwice, CTestFixture) { // the values after the second time should be the sum of two rules. timestamp += timeShift1; // simulate the time has moved forward by the time shift rule2.executeCallback(*model, timestamp); - BOOST_TEST_REQUIRE(trendModel.lastValueTime() == lastValueTimeAfterFirstShift + timeShift2); - BOOST_TEST_REQUIRE(trendModel.timeShift() == timeShift1 + timeShift2); + BOOST_TEST_REQUIRE(trendModel.lastValueTime() == lastValueTimeAfterFirstShift - timeShift2); + BOOST_TEST_REQUIRE(trendModel.timeShift() == -(timeShift1 + timeShift2)); } BOOST_AUTO_TEST_SUITE_END() From f240a162613884c08fc89489a41548c962dc7ee6 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 25 Sep 2024 13:22:38 +1200 Subject: [PATCH 05/38] [8.16][ML] Fudge for CKMostCorrelatedTest/testScale (#2729) (#2744) Add a bit of tolerance for a CKMostCorrelatedTest/testScale comparison. This _should_ reduce the frequency of failures on macOS x86_64 CI builds that are a bit slooooower than the rest. Backports #2729 --- lib/maths/common/unittest/CKMostCorrelatedTest.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/maths/common/unittest/CKMostCorrelatedTest.cc b/lib/maths/common/unittest/CKMostCorrelatedTest.cc index 1cc23fbf90..5b8fd373d6 100644 --- a/lib/maths/common/unittest/CKMostCorrelatedTest.cc +++ b/lib/maths/common/unittest/CKMostCorrelatedTest.cc @@ -764,7 +764,7 @@ BOOST_AUTO_TEST_CASE(testScale) { double sdRatio = std::sqrt(maths::common::CBasicStatistics::variance(slope)) / maths::common::CBasicStatistics::mean(slope); LOG_DEBUG(<< "sdRatio = " << sdRatio); - BOOST_TEST_REQUIRE(exponent < 2.0); + BOOST_TEST(exponent <= 2.0, boost::test_tools::tolerance(0.1)); BOOST_TEST_REQUIRE(sdRatio < 0.75); } From 393ff55b50e10d2d9e1097e9ff8c98e8bf7addc5 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 25 Sep 2024 13:24:43 +1200 Subject: [PATCH 06/38] [8.16][ML] Add CMake config to integrate gperftools (#2713) (#2750) Add options to link in libprofiler and/or libtcmalloc to autodetact and pytorch_inference. For Linux builds only. Not for production releases. Backports #2713 --- CMakeLists.txt | 28 ++++++++++++++++++++++++++++ bin/autodetect/CMakeLists.txt | 10 ++++++++++ bin/pytorch_inference/CMakeLists.txt | 10 ++++++++++ 3 files changed, 48 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index ea9df4cec3..0aeb7a04fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,6 +69,26 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Darwin") install(FILES ${CMAKE_BINARY_DIR}/Info.plist DESTINATION ${CMAKE_INSTALL_PREFIX}) endif() +if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + if(NOT LINK_TCMALLOC) + set(LINK_TCMALLOC FALSE) + endif() + if(NOT LINK_PROFILER) + set(LINK_PROFILER FALSE) + endif() +else() + if(LINK_TCMALLOC) + message(WARNING "Not linking libtcmalloc on ${CMAKE_SYSTEM_NAME}") + set(LINK_TCMALLOC FALSE) + unset(LINK_TCMALLOC CACHE) + endif() + if(LINK_PROFILER) + message(WARNING "Not linking libprofiler on ${CMAKE_SYSTEM_NAME}") + set(LINK_PROFILER FALSE) + unset(LINK_PROFILER CACHE) + endif() +endif() + message(STATUS "CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}") include_directories(SYSTEM ${ML_SYSTEM_INCLUDE_DIRECTORIES}) @@ -87,3 +107,11 @@ add_subdirectory(devlib) # Add a target to build Doxygen generated documentation # if the doxygen executable can be found ml_doxygen(${CMAKE_SOURCE_DIR}/build/doxygen) + +if (LINK_TCMALLOC) + unset(LINK_TCMALLOC CACHE) +endif() + +if (LINK_PROFILER) + unset(LINK_PROFILER CACHE) +endif() diff --git a/bin/autodetect/CMakeLists.txt b/bin/autodetect/CMakeLists.txt index 970095c0b7..93af5e9201 100644 --- a/bin/autodetect/CMakeLists.txt +++ b/bin/autodetect/CMakeLists.txt @@ -22,6 +22,16 @@ set(ML_LINK_LIBRARIES MlVer ) +if (LINK_TCMALLOC) + message(AUTHOR_WARNING "Linking libtcmalloc. Build is not for production release.") + list(APPEND ML_LINK_LIBRARIES tcmalloc) +endif () + +if (LINK_PROFILER) + message(AUTHOR_WARNING "Linking libprofiler. Build is not for production release.") + list(APPEND ML_LINK_LIBRARIES profiler) +endif () + ml_add_executable(autodetect CCmdLineParser.cc ) diff --git a/bin/pytorch_inference/CMakeLists.txt b/bin/pytorch_inference/CMakeLists.txt index 62a4f3defd..5c6ff63528 100644 --- a/bin/pytorch_inference/CMakeLists.txt +++ b/bin/pytorch_inference/CMakeLists.txt @@ -21,6 +21,16 @@ set(ML_LINK_LIBRARIES ${C10_LIB} ) +if (LINK_TCMALLOC) + message(AUTHOR_WARNING "Linking libtcmalloc. Build is not for production release.") + list(APPEND ML_LINK_LIBRARIES tcmalloc) +endif () + +if (LINK_PROFILER) + message(AUTHOR_WARNING "Linking libprofiler. Build is not for production release.") + list(APPEND ML_LINK_LIBRARIES profiler) +endif () + ml_add_executable(pytorch_inference CBufferedIStreamAdapter.cc CCmdLineParser.cc From b9664f95a5d7d8e91238651ebb091d60b34c0c19 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 25 Sep 2024 13:44:41 +1200 Subject: [PATCH 07/38] [8.16][ML] Trigger a "staging" build on merge to branch (#2717) (#2752) Ensure that code changes that are merged to active branches are automatically incorporated into a build. Backports #2717 --- catalog-info.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalog-info.yaml b/catalog-info.yaml index e1105881dd..cb3098d8a4 100644 --- a/catalog-info.yaml +++ b/catalog-info.yaml @@ -219,7 +219,7 @@ spec: ' filter_enabled: true - trigger_mode: none + trigger_mode: code repository: elastic/ml-cpp skip_intermediate_builds: true teams: From a12f4f7df89d5c6544f73fdb96b673eeb35fc1f6 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 25 Sep 2024 14:53:59 +1200 Subject: [PATCH 08/38] [8.16][ML] Fix a few address sanitizer issues (#2738) (#2745) Fix a few issues picked up by the address sanitizer. These are mostly related to unit tests, including one that leads to an occasional crash on Windows - #2651, and also: * Fix a heap-buffer-overflow in test case Address Sanitizer picked up a heap-buffer-overflow in `CBoostedTreeTest/testEdgeCases`. Rework the test slightly to avoid it. * Optimize code when running the Address Sanitizer Add the `O3` flag to those used for compiling with the Address Sanitizer enabled. In most cases this is sufficient and cuts the run time significantly. Backports #2738 --- cmake/variables.cmake | 6 +++--- lib/api/unittest/CJsonOutputWriterTest.cc | 5 +++-- lib/maths/analytics/unittest/CBoostedTreeTest.cc | 4 ++-- lib/maths/time_series/CTrendComponent.cc | 13 +++++++++---- .../time_series/unittest/CCalendarCyclicTestTest.cc | 2 +- 5 files changed, 18 insertions(+), 12 deletions(-) diff --git a/cmake/variables.cmake b/cmake/variables.cmake index a442121ce5..a1f557dd67 100644 --- a/cmake/variables.cmake +++ b/cmake/variables.cmake @@ -160,7 +160,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows") set(CMAKE_CXX_FLAGS_RELEASE "/O2 /D NDEBUG /D EXCLUDE_TRACE_LOGGING /Qfast_transcendentals /Qvec-report:1") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /O2 /D NDEBUG /D EXCLUDE_TRACE_LOGGING /Qfast_transcendentals /Qvec-report:1") set(CMAKE_CXX_FLAGS_DEBUG "/Zi /Od /RTC1") - set(CMAKE_CXX_FLAGS_SANITIZER "/fsanitize=address /Zi" CACHE STRING + set(CMAKE_CXX_FLAGS_SANITIZER "/fsanitize=address /O2 /Zi" CACHE STRING "Flags used by the C++ compiler during sanitizer builds." FORCE) set(CMAKE_EXE_LINKER_FLAGS_SANITIZER "") @@ -173,7 +173,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -DEXCLUDE_TRACE_LOGGING -Wdisabled-optimization -D_FORTIFY_SOURCE=2") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g -O3 -DNDEBUG -DEXCLUDE_TRACE_LOGGING -Wdisabled-optimization -D_FORTIFY_SOURCE=2") set(CMAKE_CXX_FLAGS_DEBUG "-g") - set(CMAKE_CXX_FLAGS_SANITIZER "-fsanitize=address -g -fno-omit-frame-pointer" CACHE STRING + set(CMAKE_CXX_FLAGS_SANITIZER "-fsanitize=address -g -O3 -fno-omit-frame-pointer" CACHE STRING "Flags used by the C++ compiler during sanitizer builds." FORCE) endif() @@ -182,7 +182,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -DEXCLUDE_TRACE_LOGGING") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g -O3 -DNDEBUG -DEXCLUDE_TRACE_LOGGING") set(CMAKE_CXX_FLAGS_DEBUG "-g") - set(CMAKE_CXX_FLAGS_SANITIZER "-fsanitize=address -g -fno-omit-frame-pointer" CACHE STRING + set(CMAKE_CXX_FLAGS_SANITIZER "-fsanitize=address -g -O3 -fno-omit-frame-pointer" CACHE STRING "Flags used by the C++ compiler during sanitizer builds." FORCE) mark_as_advanced( diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index a41d0dccf1..a88803e221 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1010,6 +1010,7 @@ BOOST_AUTO_TEST_CASE(testGeoResultsWrite) { std::string functionDescription("lat_long(location)"); ml::api::CHierarchicalResultsWriter::TOptionalStrOptionalStrPrDoublePrVec influences; std::string emptyString; + std::string mean_function("mean"); // The output writer won't close the JSON structures until is is destroyed { std::ostringstream sstream; @@ -1099,8 +1100,8 @@ BOOST_AUTO_TEST_CASE(testGeoResultsWrite) { ml::api::CHierarchicalResultsWriter::SResults result( ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, partitionFieldValue, byFieldName, byFieldValue, - correlatedByFieldValue, 1, "mean", functionDescription, 2.24, - 79, typical, actual, 10.0, 10.0, 0.5, 0.0, fieldName, + correlatedByFieldValue, 1, mean_function, functionDescription, + 2.24, 79, typical, actual, 10.0, 10.0, 0.5, 0.0, fieldName, influences, false, true, 1, 1, EMPTY_STRING_LIST, {}); BOOST_TEST_REQUIRE(writer.acceptResult(result)); BOOST_TEST_REQUIRE(writer.endOutputBatch(false, 1U)); diff --git a/lib/maths/analytics/unittest/CBoostedTreeTest.cc b/lib/maths/analytics/unittest/CBoostedTreeTest.cc index 3161800b97..dbed0b6653 100644 --- a/lib/maths/analytics/unittest/CBoostedTreeTest.cc +++ b/lib/maths/analytics/unittest/CBoostedTreeTest.cc @@ -488,8 +488,8 @@ BOOST_AUTO_TEST_CASE(testEdgeCases) { auto frame = core::makeMainStorageDataFrame(cols).first; - fillDataFrame(5, 0, 2, {{1.0}, {1.0}, {1.0}, {1.0}, {1.0}}, - {0.0, 0.0, 0.0, 0.0, 0.0}, [](const TRowRef&) { return 1.0; }, *frame); + fillDataFrame(5, 0, 2, {{1.0, 1.0, 1.0, 1.0, 1.0}}, {0.0, 0.0, 0.0, 0.0, 0.0}, + [](const TRowRef&) { return 1.0; }, *frame); BOOST_REQUIRE_NO_THROW(maths::analytics::CBoostedTreeFactory::constructFromParameters( 1, std::make_unique()) diff --git a/lib/maths/time_series/CTrendComponent.cc b/lib/maths/time_series/CTrendComponent.cc index 78f9a27216..caaaa288bb 100644 --- a/lib/maths/time_series/CTrendComponent.cc +++ b/lib/maths/time_series/CTrendComponent.cc @@ -310,10 +310,15 @@ void CTrendComponent::shiftLevel(double shift, double magnitude{shifts[last] - shifts[next - 1]}; if (m_TimeOfLastLevelChange != UNSET_TIME) { double dt{static_cast(time - m_TimeOfLastLevelChange)}; - double value{static_cast( - common::CBasicStatistics::mean(values[segments[next] - 1]))}; - m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(LEVEL_CHANGE_LABEL, - {{dt}, {value}}); + if (values.size() > segments[next] - 1) { + double value{static_cast( + common::CBasicStatistics::mean(values[segments[next] - 1]))}; + m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(LEVEL_CHANGE_LABEL, + {{dt}, {value}}); + } else { + LOG_DEBUG(<< "Size mis-match reading from values. Length = " + << values.size() << ", requested index = " << segments[next] - 1); + } } m_TimeOfLastLevelChange = time; for (std::size_t i = segments[last]; i < values.size(); ++i, time += bucketLength) { diff --git a/lib/maths/time_series/unittest/CCalendarCyclicTestTest.cc b/lib/maths/time_series/unittest/CCalendarCyclicTestTest.cc index 4609953e33..e6a791293b 100644 --- a/lib/maths/time_series/unittest/CCalendarCyclicTestTest.cc +++ b/lib/maths/time_series/unittest/CCalendarCyclicTestTest.cc @@ -528,7 +528,7 @@ BOOST_AUTO_TEST_CASE(testLongBuckets) { TDoubleVec error; for (core_t::TTime time = 0, i = 0; time <= end; time += DAY) { rng.generateNormalSamples(0.0, 9.0, 1, error); - if (time >= months[i] && time < months[i] + DAY) { + if (time >= months[i] && time < months[i] + DAY && i < months.size() - 1) { error[0] += 20.0; ++i; } From e61db8cc456a9240d4f777b38bbc6ad8dba86e80 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:33:58 +0200 Subject: [PATCH 09/38] [ML] Persist callback detection rules in the detector model state (#2739) (#2754) Backport for #2739 --- include/core/CFlatPrefixTree.h | 4 + include/core/CPatternSet.h | 2 + include/model/CAnomalyDetectorModel.h | 14 ++++ include/model/CDetectionRule.h | 9 +++ include/model/CPopulationModel.h | 2 - include/model/CRuleCondition.h | 2 + include/model/CRuleScope.h | 3 + lib/core/CFlatPrefixTree.cc | 20 +++++ lib/core/CPatternSet.cc | 12 +++ lib/model/CAnomalyDetectorModel.cc | 35 +++++++++ lib/model/CCountingModel.cc | 9 +++ lib/model/CDetectionRule.cc | 62 +++++++++++----- lib/model/CIndividualModel.cc | 5 ++ lib/model/CPopulationModel.cc | 13 +++- lib/model/CRuleCondition.cc | 9 +++ lib/model/CRuleScope.cc | 12 +++ lib/model/unittest/CCountingModelTest.cc | 42 +++++++++++ lib/model/unittest/CDetectionRuleTest.cc | 95 ++++++++++++++++++++++++ 18 files changed, 329 insertions(+), 21 deletions(-) diff --git a/include/core/CFlatPrefixTree.h b/include/core/CFlatPrefixTree.h index 7addcfed58..fc065e0eda 100644 --- a/include/core/CFlatPrefixTree.h +++ b/include/core/CFlatPrefixTree.h @@ -63,6 +63,8 @@ class CORE_EXPORT CFlatPrefixTree { SNode(char c, char type, std::uint32_t next); + std::uint64_t checksum() const; + bool operator<(char rhs) const; char s_Char; char s_Type; @@ -120,6 +122,8 @@ class CORE_EXPORT CFlatPrefixTree { //! Pretty-prints the tree. std::string print() const; + std::uint64_t checksum() const; + private: //! The recursive building helper. void buildRecursively(const TStrVec& prefixes, diff --git a/include/core/CPatternSet.h b/include/core/CPatternSet.h index 8b00d6564d..a3d1e0aa04 100644 --- a/include/core/CPatternSet.h +++ b/include/core/CPatternSet.h @@ -61,6 +61,8 @@ class CORE_EXPORT CPatternSet { //! Clears the set. void clear(); + std::uint64_t checksum() const; + private: void sortAndPruneDuplicates(TStrVec& keys); diff --git a/include/model/CAnomalyDetectorModel.h b/include/model/CAnomalyDetectorModel.h index 2e760151ed..cb6aa71a30 100644 --- a/include/model/CAnomalyDetectorModel.h +++ b/include/model/CAnomalyDetectorModel.h @@ -492,11 +492,19 @@ class MODEL_EXPORT CAnomalyDetectorModel { //! Apply time shift at the time \p time by \p shift amount of seconds. virtual void shiftTime(core_t::TTime time, core_t::TTime shift) = 0; + //! Check if the rule has been applied. + bool checkRuleApplied(const CDetectionRule& rule) const; + + //! Mark the rule as applied. + void markRuleApplied(const CDetectionRule& rule); + protected: using TStrCRef = std::reference_wrapper; using TSizeSize1VecUMap = boost::unordered_map; using TFeatureSizeSize1VecUMapPr = std::pair; using TFeatureSizeSize1VecUMapPrVec = std::vector; + using TUint64TTimePr = std::pair; + using TUint64TTimePrVec = std::vector; //! \brief The feature models. struct MODEL_EXPORT SFeatureModels { @@ -710,6 +718,9 @@ class MODEL_EXPORT CAnomalyDetectorModel { CAnnotation::EEvent type, const std::string& annotation) = 0; + TUint64TTimePrVec& appliedRuleChecksums(); + const TUint64TTimePrVec& appliedRuleChecksums() const; + private: using TModelParamsCRef = std::reference_wrapper; @@ -738,6 +749,9 @@ class MODEL_EXPORT CAnomalyDetectorModel { //! The influence calculators to use for each feature which is being //! modeled. TFeatureInfluenceCalculatorCPtrPrVecVec m_InfluenceCalculators; + + //! Checksums of the rules that should be applied only once. + TUint64TTimePrVec m_AppliedRuleChecksums; }; class CMemoryCircuitBreaker : public core::CMemoryCircuitBreaker { diff --git a/include/model/CDetectionRule.h b/include/model/CDetectionRule.h index f7dd4f0a8b..4bf3dd0165 100644 --- a/include/model/CDetectionRule.h +++ b/include/model/CDetectionRule.h @@ -65,6 +65,9 @@ class MODEL_EXPORT CDetectionRule { //! Add a condition. void addCondition(const CRuleCondition& condition); + //! Clear conditions. + void clearConditions(); + //! Set callback function to apply some action to a supplied time series model. void setCallback(TCallback cb); @@ -88,6 +91,9 @@ class MODEL_EXPORT CDetectionRule { //! Pretty-print the rule. std::string print() const; + //! Checksum the rule. + std::uint64_t checksum() const; + private: std::string printAction() const; @@ -105,6 +111,9 @@ class MODEL_EXPORT CDetectionRule { //! Callback function to apply a change to a model based on the rule action. TCallback m_Callback; + + //! The time shift to apply to the model. + core_t::TTime m_TimeShift{0}; }; } } diff --git a/include/model/CPopulationModel.h b/include/model/CPopulationModel.h index 347f5509bd..979fbc35e0 100644 --- a/include/model/CPopulationModel.h +++ b/include/model/CPopulationModel.h @@ -23,8 +23,6 @@ #include #include -#include -#include #include #include diff --git a/include/model/CRuleCondition.h b/include/model/CRuleCondition.h index db675760e1..d7b3937e4b 100644 --- a/include/model/CRuleCondition.h +++ b/include/model/CRuleCondition.h @@ -73,6 +73,8 @@ class MODEL_EXPORT CRuleCondition { std::size_t cid, core_t::TTime time) const; + std::uint64_t checksum() const; + private: bool testValue(double value) const; std::string print(ERuleConditionAppliesTo appliesTo) const; diff --git a/include/model/CRuleScope.h b/include/model/CRuleScope.h index 8cf5444c81..0ea0a47506 100644 --- a/include/model/CRuleScope.h +++ b/include/model/CRuleScope.h @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -58,6 +59,8 @@ class MODEL_EXPORT CRuleScope { //! Pretty-print the scope. std::string print() const; + std::uint64_t checksum() const; + private: //! A vector that holds the triple of the field, filter and its type. TStrPatternSetCRefFilterTypeTrVec m_Scope; diff --git a/lib/core/CFlatPrefixTree.cc b/lib/core/CFlatPrefixTree.cc index 56f6dbdcfc..f36489346e 100644 --- a/lib/core/CFlatPrefixTree.cc +++ b/lib/core/CFlatPrefixTree.cc @@ -11,6 +11,7 @@ #include +#include #include #include #include @@ -226,5 +227,24 @@ std::string CFlatPrefixTree::print() const { result += "]"; return result; } + +std::uint64_t CFlatPrefixTree::SNode::checksum() const { + std::uint64_t result{0}; + result = CHashing::hashCombine(result, static_cast(s_Char)); + result = CHashing::hashCombine(result, static_cast(s_Type)); + result = CHashing::hashCombine(result, static_cast(s_Next)); + return result; +} + +std::uint64_t CFlatPrefixTree::checksum() const { + std::uint64_t result{0}; + + // Iterate over m_FlatTree + for (const auto& node : m_FlatTree) { + result = CHashing::hashCombine(result, node.checksum()); + } + + return result; +} } } diff --git a/lib/core/CPatternSet.cc b/lib/core/CPatternSet.cc index 393d6796f8..c4cf3b6275 100644 --- a/lib/core/CPatternSet.cc +++ b/lib/core/CPatternSet.cc @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -153,5 +154,16 @@ void CPatternSet::clear() { m_SuffixPatterns.clear(); m_ContainsPatterns.clear(); } + +std::uint64_t CPatternSet::checksum() const { + std::uint64_t result{0}; + + result = CHashing::hashCombine(result, m_FullMatchPatterns.checksum()); + result = CHashing::hashCombine(result, m_PrefixPatterns.checksum()); + result = CHashing::hashCombine(result, m_SuffixPatterns.checksum()); + result = CHashing::hashCombine(result, m_ContainsPatterns.checksum()); + + return result; +} } } diff --git a/lib/model/CAnomalyDetectorModel.cc b/lib/model/CAnomalyDetectorModel.cc index c39339dc6b..90f6f95de9 100644 --- a/lib/model/CAnomalyDetectorModel.cc +++ b/lib/model/CAnomalyDetectorModel.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -48,6 +49,7 @@ const std::string EMPTY; const model_t::CResultType SKIP_SAMPLING_RESULT_TYPE; const double SKIP_SAMPLING_WEIGHT{0.005}; +const core_t::TTime APPLIED_DETECTION_RULE_EXPIRATION{31536000}; // 1 year const CAnomalyDetectorModel::TStr1Vec EMPTY_STRING_LIST; @@ -325,6 +327,7 @@ std::uint64_t CAnomalyDetectorModel::checksum(bool /*includeCurrentBucketStats*/ hash = maths::common::CChecksum::calculate(hash, m_PersonBucketCounts[pid]); } } + seed = maths::common::CChecksum::calculate(seed, m_AppliedRuleChecksums); LOG_TRACE(<< "seed = " << seed); LOG_TRACE(<< "checksums = " << hashes); return maths::common::CChecksum::calculate(seed, hashes); @@ -336,6 +339,7 @@ void CAnomalyDetectorModel::debugMemoryUsage(const core::CMemoryUsage::TMemoryUs core::memory_debug::dynamicSize("m_Params", m_Params, mem); core::memory_debug::dynamicSize("m_PersonBucketCounts", m_PersonBucketCounts, mem); core::memory_debug::dynamicSize("m_InfluenceCalculators", m_InfluenceCalculators, mem); + core::memory_debug::dynamicSize("m_AppliedRuleChecksums", m_AppliedRuleChecksums, mem); } std::size_t CAnomalyDetectorModel::memoryUsage() const { @@ -343,6 +347,7 @@ std::size_t CAnomalyDetectorModel::memoryUsage() const { mem += core::memory::dynamicSize(m_DataGatherer); mem += core::memory::dynamicSize(m_PersonBucketCounts); mem += core::memory::dynamicSize(m_InfluenceCalculators); + mem += core::memory::dynamicSize(m_AppliedRuleChecksums); return mem; } @@ -657,5 +662,35 @@ void CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::prototypePrior( bool CMemoryCircuitBreaker::areAllocationsAllowed() const { return m_ResourceMonitor->areAllocationsAllowed(); } + +CAnomalyDetectorModel::TUint64TTimePrVec& CAnomalyDetectorModel::appliedRuleChecksums() { + return m_AppliedRuleChecksums; +} + +const CAnomalyDetectorModel::TUint64TTimePrVec& +CAnomalyDetectorModel::appliedRuleChecksums() const { + return m_AppliedRuleChecksums; +} + +bool CAnomalyDetectorModel::checkRuleApplied(const CDetectionRule& rule) const { + auto checksum = rule.checksum(); + return std::find_if(m_AppliedRuleChecksums.begin(), + m_AppliedRuleChecksums.end(), [checksum](const auto& pair) { + return pair.first == checksum; + }) != m_AppliedRuleChecksums.end(); +} + +void CAnomalyDetectorModel::markRuleApplied(const CDetectionRule& rule) { + auto currentTime = core::CTimeUtils::now(); + m_AppliedRuleChecksums.emplace_back(rule.checksum(), currentTime); + + // Remove all rules that are older than the expiration time + m_AppliedRuleChecksums.erase( + std::remove_if(m_AppliedRuleChecksums.begin(), m_AppliedRuleChecksums.end(), + [currentTime](const auto& pair) { + return currentTime - pair.second > APPLIED_DETECTION_RULE_EXPIRATION; + }), + m_AppliedRuleChecksums.end()); +} } } diff --git a/lib/model/CCountingModel.cc b/lib/model/CCountingModel.cc index 06fb6a34ea..dc938e9f06 100644 --- a/lib/model/CCountingModel.cc +++ b/lib/model/CCountingModel.cc @@ -36,6 +36,7 @@ namespace { const std::string WINDOW_BUCKET_COUNT_TAG("a"); const std::string PERSON_BUCKET_COUNT_TAG("b"); const std::string MEAN_COUNT_TAG("c"); +const std::string APPLIED_DETECTION_RULE_CHECKSUMS_TAG("d"); // Extra data tag deprecated at model version 34 // TODO remove on next version bump //const std::string EXTRA_DATA_TAG("d"); @@ -79,6 +80,8 @@ void CCountingModel::acceptPersistInserter(core::CStatePersistInserter& inserter core::CPersistUtils::persist(PERSON_BUCKET_COUNT_TAG, this->personBucketCounts(), inserter); core::CPersistUtils::persist(MEAN_COUNT_TAG, m_MeanCounts, inserter); + core::CPersistUtils::persist(APPLIED_DETECTION_RULE_CHECKSUMS_TAG, + this->appliedRuleChecksums(), inserter); } bool CCountingModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -102,6 +105,12 @@ bool CCountingModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traver LOG_ERROR(<< "Invalid mean counts"); return false; } + } else if (name == APPLIED_DETECTION_RULE_CHECKSUMS_TAG) { + if (core::CPersistUtils::restore(name, this->appliedRuleChecksums(), + traverser) == false) { + LOG_ERROR(<< "Invalid applied detection rule checksums"); + return false; + } } } while (traverser.next()); diff --git a/lib/model/CDetectionRule.cc b/lib/model/CDetectionRule.cc index c6f8dd3531..ec1d8687e7 100644 --- a/lib/model/CDetectionRule.cc +++ b/lib/model/CDetectionRule.cc @@ -8,12 +8,16 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ +#include #include #include +#include + #include -#include + +#include namespace ml { namespace model { @@ -38,6 +42,10 @@ void CDetectionRule::addCondition(const CRuleCondition& condition) { m_Conditions.push_back(condition); } +void CDetectionRule::clearConditions() { + m_Conditions.clear(); +} + void CDetectionRule::setCallback(TCallback cb) { m_Callback = std::move(cb); } @@ -74,27 +82,30 @@ void CDetectionRule::executeCallback(CAnomalyDetectorModel& model, core_t::TTime return; } } + if (model.checkRuleApplied(*this)) { + return; + } m_Callback(model, time); + + // Time shift rules should be applied only once + if (m_Action & E_TimeShift) { + model.markRuleApplied(*this); + } } } void CDetectionRule::addTimeShift(core_t::TTime timeShift) { - using TAnomalyDetectorPtrVec = core::CSmallVector; - this->setCallback([ - timeShift, timeShiftApplied = TAnomalyDetectorPtrVec() - ](CAnomalyDetectorModel & model, core_t::TTime time) mutable { - if (std::find(timeShiftApplied.begin(), timeShiftApplied.end(), &model) == - timeShiftApplied.end()) { - // When the callback is executed, the model is already in the correct time - // interval. Hence, we need to shift the time right away. - // IMPLEMENTATION DECISION: We apply the negative amount of time shift to the - // model. This is because the time shift is applied to the model's frame of reference - // and not the global time. This allows a more intuitive configuration from the user's - // perspective: in spring we move the clock forward, and the time shift is positive, in - // autumn we move the clock backward, and the time shift is negative. - model.shiftTime(time, -timeShift); - timeShiftApplied.emplace_back(&model); - } + m_Action |= E_TimeShift; + m_TimeShift = timeShift; + this->setCallback([timeShift](CAnomalyDetectorModel& model, core_t::TTime time) { + // When the callback is executed, the model is already in the correct time + // interval. Hence, we need to shift the time right away. + // IMPLEMENTATION DECISION: We apply the negative amount of time shift to the + // model. This is because the time shift is applied to the model's frame of reference + // and not the global time. This allows a more intuitive configuration from the user's + // perspective: in spring we move the clock forward, and the time shift is positive, in + // autumn we move the clock backward, and the time shift is negative. + model.shiftTime(time, -timeShift); }); } @@ -134,5 +145,22 @@ std::string CDetectionRule::printAction() const { } return result; } + +std::uint64_t CDetectionRule::checksum() const { + std::uint64_t result = maths::common::CChecksum::calculate(0, m_Action); + result = maths::common::CChecksum::calculate(result, m_Scope); + result = maths::common::CChecksum::calculate(result, m_Conditions); + + // Hash callback parameters if applicable + if (m_Action & E_TimeShift) { + // Hash m_TimeShift + result = maths::common::CChecksum::calculate(result, m_TimeShift); + } + + // IMPLEMENTATION NOTE: If there are other parameters associated with the callback, + // they should be included in the checksum. + + return result; +} } } diff --git a/lib/model/CIndividualModel.cc b/lib/model/CIndividualModel.cc index 33d650402e..e5b8fbafc3 100644 --- a/lib/model/CIndividualModel.cc +++ b/lib/model/CIndividualModel.cc @@ -67,6 +67,7 @@ const std::string FEATURE_CORRELATE_MODELS_TAG("f"); //const std::string INTERIM_BUCKET_CORRECTOR_TAG("h"); const std::string MEMORY_ESTIMATOR_TAG("i"); const std::string UPGRADING_PRE_7_5_STATE("j"); +const std::string APPLIED_DETECTION_RULE_CHECKSUMS_TAG("k"); } CIndividualModel::CIndividualModel(const SModelParams& params, @@ -357,6 +358,8 @@ void CIndividualModel::doAcceptPersistInserter(core::CStatePersistInserter& inse } core::CPersistUtils::persist(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, inserter); inserter.insertValue(UPGRADING_PRE_7_5_STATE, false); + core::CPersistUtils::persist(APPLIED_DETECTION_RULE_CHECKSUMS_TAG, + this->appliedRuleChecksums(), inserter); } bool CIndividualModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -387,6 +390,8 @@ bool CIndividualModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& tr RESTORE(MEMORY_ESTIMATOR_TAG, core::CPersistUtils::restore(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, traverser)) RESTORE_BUILT_IN(UPGRADING_PRE_7_5_STATE, upgradingPre7p5State) + RESTORE(APPLIED_DETECTION_RULE_CHECKSUMS_TAG, + core::CPersistUtils::restore(name, this->appliedRuleChecksums(), traverser)); } while (traverser.next()); if (traverser.haveBadState()) { diff --git a/lib/model/CPopulationModel.cc b/lib/model/CPopulationModel.cc index a803815b4e..28d6358359 100644 --- a/lib/model/CPopulationModel.cc +++ b/lib/model/CPopulationModel.cc @@ -91,6 +91,8 @@ const std::string ATTRIBUTE_FIRST_BUCKET_TIME_TAG("d"); const std::string ATTRIBUTE_LAST_BUCKET_TIME_TAG("e"); const std::string PERSON_ATTRIBUTE_BUCKET_COUNT_TAG("f"); const std::string DISTINCT_PERSON_COUNT_TAG("g"); +const std::string APPLIED_DETECTION_RULE_CHECKSUMS_TAG("h"); + // Extra data tag deprecated at model version 34 // TODO remove on next version bump //const std::string EXTRA_DATA_TAG("h"); @@ -294,6 +296,8 @@ void CPopulationModel::doAcceptPersistInserter(core::CStatePersistInserter& inse std::bind(&maths::common::CBjkstUniqueValues::acceptPersistInserter, &m_DistinctPersonCounts[cid], std::placeholders::_1)); } + core::CPersistUtils::persist(APPLIED_DETECTION_RULE_CHECKSUMS_TAG, + this->appliedRuleChecksums(), inserter); } bool CPopulationModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -316,12 +320,17 @@ bool CPopulationModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& tr maths::time_series::CCountMinSketch(0, 0)); m_PersonAttributeBucketCounts.back().swap(sketch); continue; - } - if (name == DISTINCT_PERSON_COUNT_TAG) { + } else if (name == DISTINCT_PERSON_COUNT_TAG) { maths::common::CBjkstUniqueValues sketch(traverser); m_DistinctPersonCounts.push_back(maths::common::CBjkstUniqueValues(0, 0)); m_DistinctPersonCounts.back().swap(sketch); continue; + } else if (name == APPLIED_DETECTION_RULE_CHECKSUMS_TAG) { + if (core::CPersistUtils::restore(name, this->appliedRuleChecksums(), + traverser) == false) { + LOG_ERROR(<< "Invalid applied detection rule checksums"); + return false; + } } } while (traverser.next()); diff --git a/lib/model/CRuleCondition.cc b/lib/model/CRuleCondition.cc index 6f6ba6c475..8b9a35fb0e 100644 --- a/lib/model/CRuleCondition.cc +++ b/lib/model/CRuleCondition.cc @@ -12,6 +12,8 @@ #include #include +#include + #include #include @@ -164,5 +166,12 @@ std::string CRuleCondition::print(ERuleConditionOperator op) const { } return std::string(); } + +std::uint64_t CRuleCondition::checksum() const { + std::uint64_t result{maths::common::CChecksum::calculate(0, m_AppliesTo)}; + result = maths::common::CChecksum::calculate(result, m_Operator); + result = maths::common::CChecksum::calculate(result, m_Value); + return result; +} } } diff --git a/lib/model/CRuleScope.cc b/lib/model/CRuleScope.cc index 2cd541c5c7..e239491d7f 100644 --- a/lib/model/CRuleScope.cc +++ b/lib/model/CRuleScope.cc @@ -13,6 +13,8 @@ #include +#include + #include #include @@ -70,5 +72,15 @@ std::string CRuleScope::print() const { } return result; } + +std::uint64_t CRuleScope::checksum() const { + std::uint64_t result{0}; + for (const auto& triple : m_Scope) { + result = maths::common::CChecksum::calculate(result, triple.first); + result = maths::common::CChecksum::calculate(result, triple.second.get()); + result = maths::common::CChecksum::calculate(result, triple.third); + } + return result; +} } } diff --git a/lib/model/unittest/CCountingModelTest.cc b/lib/model/unittest/CCountingModelTest.cc index e4a3a5de65..facdbde9c1 100644 --- a/lib/model/unittest/CCountingModelTest.cc +++ b/lib/model/unittest/CCountingModelTest.cc @@ -10,6 +10,7 @@ */ #include +#include #include #include @@ -23,6 +24,7 @@ #include #include "CModelTestFixtureBase.h" +#include "core/CJsonStateRestoreTraverser.h" #include @@ -263,4 +265,44 @@ BOOST_FIXTURE_TEST_CASE(testInterimBucketCorrector, CTestFixture) { } } +BOOST_FIXTURE_TEST_CASE(testAppliedRuleChecksumsPersistRestore, CTestFixture) { + // Check that applied rule checksums are persisted and restored correctly. + + core_t::TTime time{200}; + core_t::TTime bucketLength{600}; + + SModelParams params(bucketLength); + params.s_DecayRate = 0.001; + + this->makeModel(params, {model_t::E_IndividualCountByBucketAndPerson}, time); + CCountingModel* model = dynamic_cast(m_Model.get()); + BOOST_TEST_REQUIRE(model); + + // Create a time shift detection rule and apply it + CRuleCondition conditionGte; + conditionGte.appliesTo(CRuleCondition::E_Time); + conditionGte.op(CRuleCondition::E_GTE); + conditionGte.value(100.0); + + CDetectionRule rule; + rule.addCondition(conditionGte); + rule.addTimeShift(100); + rule.executeCallback(*model, time); + + // Persist the model with CCountingModel::acceptPersistInserter + std::ostringstream persistStream; + core::CJsonStatePersistInserter inserter(persistStream); + model->acceptPersistInserter(inserter); + std::string persist = persistStream.str(); + + // Restore the model with CCountingModel::acceptRestoreTraversal + std::istringstream restoreStream(persist); + core::CJsonStateRestoreTraverser traverser(restoreStream); + auto restoredModel = std::make_shared( + params, m_Gatherer, m_InterimBucketCorrector, traverser); + + // Check that for the restored model the rule is marked as applied + BOOST_REQUIRE(model->checkRuleApplied(rule) == true); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/model/unittest/CDetectionRuleTest.cc b/lib/model/unittest/CDetectionRuleTest.cc index ec41d0a85c..c4df2755f4 100644 --- a/lib/model/unittest/CDetectionRuleTest.cc +++ b/lib/model/unittest/CDetectionRuleTest.cc @@ -1095,4 +1095,99 @@ BOOST_FIXTURE_TEST_CASE(testTwoTimeShiftRuleShouldShiftTwice, CTestFixture) { BOOST_TEST_REQUIRE(trendModel.timeShift() == -(timeShift1 + timeShift2)); } +BOOST_FIXTURE_TEST_CASE(testChecksum, CTestFixture) { + // Create two identical rules + CDetectionRule rule1; + CDetectionRule rule2; + + // Compute checksums + std::uint64_t checksum1 = rule1.checksum(); + std::uint64_t checksum2 = rule2.checksum(); + + // Verify that identical rules have the same checksum + BOOST_REQUIRE_EQUAL(checksum1, checksum2); + + // Test actions + // Modify the action of rule2 + rule2.action(CDetectionRule::E_SkipModelUpdate); + + // Verify that different actions result in different checksums + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_NE(checksum1, checksum2); + + // Test conditions + // Reset rule2 to be identical to rule1 + rule2 = rule1; + + // Add a condition to rule2 + CRuleCondition condition; + condition.appliesTo(CRuleCondition::E_Actual); + condition.op(CRuleCondition::E_GT); + condition.value(100.0); + rule2.addCondition(condition); + + // Verify that adding a condition changes the checksum + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_NE(checksum1, checksum2); + + // Add the same condition to rule1 + rule1.addCondition(condition); + + // Verify that identical conditions result in the same checksum + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_EQUAL(checksum1, checksum2); + + // Modify the condition in rule2 + condition.value(200.0); + rule2.clearConditions(); + rule2.addCondition(condition); + + // Verify that different condition values result in different checksums + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_NE(checksum1, checksum2); + + // Test Scope + rule2 = rule1; + + // Modify the scope of rule2 + std::string fieldName = "user"; + core::CPatternSet valueFilter; + valueFilter.initFromPatternList({"admin"}); + rule2.includeScope(fieldName, valueFilter); + + // Verify that different scopes result in different checksums + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_NE(checksum1, checksum2); + + // Add the same scope to rule1 + rule1.includeScope(fieldName, valueFilter); + + // Verify that identical scopes result in the same checksum + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_EQUAL(checksum1, checksum2); + + // Test Time Shift + // Modify the time shift in rule2 + rule2.addTimeShift(3600); + + // Verify that different time shifts result in different checksums + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_NE(checksum1, checksum2); + + // Add the same time shift to rule1 + rule1.addTimeShift(3600); + + // Verify that identical time shifts result in the same checksum + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_EQUAL(checksum1, checksum2); +} + BOOST_AUTO_TEST_SUITE_END() From 86faea9d791e3461e3e76f7b41b9caa8da9403f5 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Sat, 28 Sep 2024 08:53:26 +1200 Subject: [PATCH 10/38] [8.16][ML] QA tests on specific branch and version (#2742) (#2758) Enable being able to specify a particular ES branch and stack version when running QA tests on a PR. The syntax of the GitHub comment to do this is, e.g.: ``` buildkite run_qa_tests for ES_BRANCH=8.x with STACK_VERSION=8.16.0 ``` Backports #2742 --- .buildkite/ml_pipeline/config.py | 13 +++++++++++++ .buildkite/pipelines/run_qa_tests.yml.sh | 14 +++++++++++++- .buildkite/pull-requests.json | 2 +- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/.buildkite/ml_pipeline/config.py b/.buildkite/ml_pipeline/config.py index 363d6826ef..4669ce8b58 100644 --- a/.buildkite/ml_pipeline/config.py +++ b/.buildkite/ml_pipeline/config.py @@ -40,6 +40,19 @@ def parse_comment(self): if self.run_pytorch_tests or self.run_qa_tests: self.action = "build" + # If the ACTION is set to "run_qa_tests" then set some optional variables governing the ES branch to build, the + # stack version to set and the subset of QA tests to run, depending on whether appropriate variables are set in + # the environment. + if self.run_qa_tests: + if "GITHUB_PR_COMMENT_VAR_BRANCH" in os.environ: + os.environ["ES_BRANCH"] = os.environ["GITHUB_PR_COMMENT_VAR_BRANCH"] + + if "GITHUB_PR_COMMENT_VAR_VERSION" in os.environ: + os.environ["STACK_VERSION"] = os.environ["GITHUB_PR_COMMENT_VAR_VERSION"] + + if "GITHUB_PR_COMMENT_VAR_ARGS" in os.environ: + os.environ["QAF_TESTS_TO_RUN"] = os.environ["GITHUB_PR_COMMENT_VAR_ARGS"] + # If the GITHUB_PR_COMMENT_VAR_ARCH environment variable is set then attemot to parse it # into comma separated values. If the values are one or both of "aarch64" or "x86_64" then set the member # variables self.build_aarch64, self.build_x86_64 accordingly. These values will be used to restrict the build diff --git a/.buildkite/pipelines/run_qa_tests.yml.sh b/.buildkite/pipelines/run_qa_tests.yml.sh index 5534588ea3..5cff0aed85 100755 --- a/.buildkite/pipelines/run_qa_tests.yml.sh +++ b/.buildkite/pipelines/run_qa_tests.yml.sh @@ -24,5 +24,17 @@ steps: build: message: "${BUILDKITE_MESSAGE}" env: - QAF_TESTS_TO_RUN: "ml_cpp_pr" + QAF_TESTS_TO_RUN: "${QAF_TESTS_TO_RUN:-ml_cpp_pr}" EOL + +if [ "${ES_BRANCH}" != "" ]; then +cat <build|debug|run_qa_tests|run_pytorch_tests) *(?: *on *(?(?:[ ,]*(?:windows|linux|mac(os)?))+))?) *(?(?:[ ,]*aarch64|x86_64)+)?$", + "trigger_comment_regex": "^(?:(?:buildkite +)(?build|debug|run_qa_tests|run_pytorch_tests)(=(?(?:[^ ]+)))? *(?: for ES_BRANCH=(?([.0-9a-zA-Z]+)))? *(?:with STACK_VERSION=(?([.0-9]+)))? *(?: *on *(?(?:[ ,]*(?:windows|linux|mac(os)?))+))?) *(?(?:[, ]*aarch64|x86_64)+)?$", "always_trigger_comment_regex": "^(?:(?:buildkite\\W+)?(?:build|test)\\W+(?:this|it))", "skip_ci_labels": ["skip-ci", "jenkins-ci", ">test-mute", ">docs"], "skip_target_branches": ["6.8", "7.11", "7.12"], From 73bd200a64a8a8dbdcee97cbf6d94b871f7eb358 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Fri, 11 Oct 2024 09:42:04 +0200 Subject: [PATCH 11/38] Revert "[ML] Change time shift sign for intuitive configuration (#2737) (#2743)" (#2761) This reverts commit 0bc01ba0e12e9bc8c634d096e79f740df621bf80. --- lib/model/CDetectionRule.cc | 7 +------ lib/model/unittest/CDetectionRuleTest.cc | 8 ++++---- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/lib/model/CDetectionRule.cc b/lib/model/CDetectionRule.cc index ec1d8687e7..0a4fa6de6c 100644 --- a/lib/model/CDetectionRule.cc +++ b/lib/model/CDetectionRule.cc @@ -100,12 +100,7 @@ void CDetectionRule::addTimeShift(core_t::TTime timeShift) { this->setCallback([timeShift](CAnomalyDetectorModel& model, core_t::TTime time) { // When the callback is executed, the model is already in the correct time // interval. Hence, we need to shift the time right away. - // IMPLEMENTATION DECISION: We apply the negative amount of time shift to the - // model. This is because the time shift is applied to the model's frame of reference - // and not the global time. This allows a more intuitive configuration from the user's - // perspective: in spring we move the clock forward, and the time shift is positive, in - // autumn we move the clock backward, and the time shift is negative. - model.shiftTime(time, -timeShift); + model.shiftTime(time, timeShift); }); } diff --git a/lib/model/unittest/CDetectionRuleTest.cc b/lib/model/unittest/CDetectionRuleTest.cc index c4df2755f4..271ce6849e 100644 --- a/lib/model/unittest/CDetectionRuleTest.cc +++ b/lib/model/unittest/CDetectionRuleTest.cc @@ -1020,8 +1020,8 @@ BOOST_FIXTURE_TEST_CASE(testRuleTimeShiftShouldShiftTimeSeriesModelState, CTestF rule.executeCallback(*model, timestamp); // the time series model should have been shifted by specified amount. - BOOST_TEST_REQUIRE(trendModel.lastValueTime() == lastValueTime - timeShiftInSecs); - BOOST_TEST_REQUIRE(trendModel.timeShift() == -timeShiftInSecs); + BOOST_TEST_REQUIRE(trendModel.lastValueTime() == lastValueTime + timeShiftInSecs); + BOOST_TEST_REQUIRE(trendModel.timeShift() == timeShiftInSecs); // and an annotation should have been added to the model BOOST_TEST_REQUIRE(annotations.size() == numAnnotationsBeforeShift + 1); @@ -1091,8 +1091,8 @@ BOOST_FIXTURE_TEST_CASE(testTwoTimeShiftRuleShouldShiftTwice, CTestFixture) { // the values after the second time should be the sum of two rules. timestamp += timeShift1; // simulate the time has moved forward by the time shift rule2.executeCallback(*model, timestamp); - BOOST_TEST_REQUIRE(trendModel.lastValueTime() == lastValueTimeAfterFirstShift - timeShift2); - BOOST_TEST_REQUIRE(trendModel.timeShift() == -(timeShift1 + timeShift2)); + BOOST_TEST_REQUIRE(trendModel.lastValueTime() == lastValueTimeAfterFirstShift + timeShift2); + BOOST_TEST_REQUIRE(trendModel.timeShift() == timeShift1 + timeShift2); } BOOST_FIXTURE_TEST_CASE(testChecksum, CTestFixture) { From e8ba5ef98f9e7b0db450fae145becd14c6b97cae Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Tue, 15 Oct 2024 09:34:44 +0200 Subject: [PATCH 12/38] [ML] Fix forecasting parameters initialization for large models (#2759) (#2762) We fix initialization of the decayRate parameter when models have to spill to disk --- docs/CHANGELOG.asciidoc | 6 ++++++ lib/model/CForecastModelPersist.cc | 6 +----- lib/model/unittest/CForecastModelPersistTest.cc | 6 ++++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 9c1d528166..32af915f1e 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -39,6 +39,12 @@ * Allow for pytorch_inference results to include zero-dimensional tensors. +== {es} version 8.15.4 + +=== Bug Fixes + +* Fix parameter initialization for large forecasting models. (See {ml-pull}2759[#2759].) + == {es} version 8.15.2 === Enhancements diff --git a/lib/model/CForecastModelPersist.cc b/lib/model/CForecastModelPersist.cc index 0f59a09a50..51d353c6ff 100644 --- a/lib/model/CForecastModelPersist.cc +++ b/lib/model/CForecastModelPersist.cc @@ -128,11 +128,7 @@ bool CForecastModelPersist::CRestore::nextModel(TMathsModelPtr& model, m_ModelParams.s_MaximumTimeToTestForChange}; maths::common::SModelRestoreParams params{ - modelParams, - maths::common::STimeSeriesDecompositionRestoreParams{ - m_ModelParams.s_DecayRate, m_ModelParams.s_BucketLength, - m_ModelParams.s_ComponentSize, - m_ModelParams.distributionRestoreParams(dataType)}, + modelParams, m_ModelParams.decompositionRestoreParams(dataType), m_ModelParams.distributionRestoreParams(dataType)}; auto serialiserOperator = diff --git a/lib/model/unittest/CForecastModelPersistTest.cc b/lib/model/unittest/CForecastModelPersistTest.cc index b932b1423f..d0aa03af22 100644 --- a/lib/model/unittest/CForecastModelPersistTest.cc +++ b/lib/model/unittest/CForecastModelPersistTest.cc @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -39,8 +40,9 @@ BOOST_AUTO_TEST_CASE(testPersistAndRestore) { params.s_DecayRate = 0.001; params.s_LearnRate = 1.0; params.s_MinimumTimeToDetectChange = 6 * core::constants::HOUR; - params.s_MaximumTimeToTestForChange = core::constants::DAY; - maths::time_series::CTimeSeriesDecomposition trend(params.s_DecayRate, bucketLength); + double trendDecayRate{CAnomalyDetectorModelConfig::trendDecayRate( + params.s_DecayRate, bucketLength)}; + maths::time_series::CTimeSeriesDecomposition trend(trendDecayRate, bucketLength); maths::common::CNormalMeanPrecConjugate prior{ maths::common::CNormalMeanPrecConjugate::nonInformativePrior( From 148f2c42652bddc38224ef8bf50992647d38ca9a Mon Sep 17 00:00:00 2001 From: Jan Kuipers <148754765+jan-elastic@users.noreply.github.com> Date: Wed, 16 Oct 2024 16:10:15 +0200 Subject: [PATCH 13/38] [ML] Preparing for 8.16 feature freeze (#2766) --- catalog-info.yaml | 14 +++++++++----- gradle.properties | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/catalog-info.yaml b/catalog-info.yaml index cb3098d8a4..195e681d24 100644 --- a/catalog-info.yaml +++ b/catalog-info.yaml @@ -156,7 +156,7 @@ spec: build_branches: true build_pull_request_forks: false cancel_deleted_branch_builds: true - filter_condition: build.branch == "main" || build.branch == "8.15" || build.branch == "7.17" + filter_condition: build.branch == "main" || build.branch == "8.x" || build.branch == "8.16" || build.branch == "7.17" filter_enabled: true publish_blocked_as_pending: true publish_commit_status: false @@ -166,12 +166,16 @@ spec: schedules: Daily 7_17: branch: '7.17' - cronline: 30 02 * * * + cronline: 30 03 * * * message: Daily SNAPSHOT build for 7.17 - Daily 8_15: - branch: '8.15' + Daily 8_16: + branch: '8.16' + cronline: 30 02 * * * + message: Daily SNAPSHOT build for 8.16 + Daily 8_x: + branch: '8.x' cronline: 30 01 * * * - message: Daily SNAPSHOT build for 8.15 + message: Daily SNAPSHOT build for 8.x Daily main: branch: main cronline: 30 00 * * * diff --git a/gradle.properties b/gradle.properties index cf665d57c6..a72fdabb1b 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.daemon=false -elasticsearchVersion=8.16.0 +elasticsearchVersion=8.17.0 artifactName=ml-cpp From 28d45036ddf9c4e4ddfbb9c41a7220e60e03630f Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Wed, 20 Nov 2024 20:47:12 +0100 Subject: [PATCH 14/38] [ML] Bump version to 8.18.0 (#2781) Update the Elasticsearch version to 8.18.0 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index a72fdabb1b..6783aa7ca2 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.daemon=false -elasticsearchVersion=8.17.0 +elasticsearchVersion=8.18.0 artifactName=ml-cpp From 1be8e50850531a52706950a6b583594c7783dd7f Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 4 Dec 2024 21:48:02 +1300 Subject: [PATCH 15/38] [8.18][ML] Upgrade to PyTorch 2.5.0 on Windows 2016 builds (#2783) (#2790) Update build scripts and docs to refer to PyTorch 2.5.0 Note the slightly different procedure between the Windows 2022 and 2016 builds. Backports #2783 --- 3rd_party/licenses/pytorch-INFO.csv | 2 +- build-setup/windows.md | 6 +++--- dev-tools/download_windows_deps.ps1 | 6 +++--- docs/CHANGELOG.asciidoc | 6 ++++++ 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/3rd_party/licenses/pytorch-INFO.csv b/3rd_party/licenses/pytorch-INFO.csv index 40806db5f4..aab5a87478 100644 --- a/3rd_party/licenses/pytorch-INFO.csv +++ b/3rd_party/licenses/pytorch-INFO.csv @@ -1,2 +1,2 @@ name,version,revision,url,license,copyright,sourceURL -PyTorch,2.3.1,63d5e9221bedd1546b7d364b5ce4171547db12a9,https://pytorch.org,BSD-3-Clause,, +PyTorch,2.5.0,32f585d9346e316e554c8d9bf7548af9f62141fc,https://pytorch.org,BSD-3-Clause,, diff --git a/build-setup/windows.md b/build-setup/windows.md index 15ff2b6b92..b3a00d0677 100644 --- a/build-setup/windows.md +++ b/build-setup/windows.md @@ -193,7 +193,7 @@ On the "Advanced Options" screen, check "Install for all users" and "Add Python For the time being, do not take advantage of the option on the final installer screen to reconfigure the machine to allow paths longer than 260 characters. We still support Windows versions that do not have this option. -### PyTorch 2.3.1 +### PyTorch 2.5.0 (This step requires a lot of memory. It failed on a machine with 12GB of RAM. It just about fitted on a 20GB machine. 32GB RAM is recommended.) @@ -209,7 +209,7 @@ Next, in a Git bash shell run: ``` cd /c/tools -git clone --depth=1 --branch=v2.3.1 https://github.com/pytorch/pytorch.git +git clone --depth=1 --branch=v2.5.0 https://github.com/pytorch/pytorch.git cd pytorch git submodule sync git submodule update --init --recursive @@ -265,7 +265,7 @@ set USE_QNNPACK=OFF set USE_PYTORCH_QNNPACK=OFF set USE_XNNPACK=OFF set MSVC_Z7_OVERRIDE=OFF -set PYTORCH_BUILD_VERSION=2.3.1 +set PYTORCH_BUILD_VERSION=2.5.0 set PYTORCH_BUILD_NUMBER=1 python setup.py install ``` diff --git a/dev-tools/download_windows_deps.ps1 b/dev-tools/download_windows_deps.ps1 index ac04854f19..9a303a484c 100755 --- a/dev-tools/download_windows_deps.ps1 +++ b/dev-tools/download_windows_deps.ps1 @@ -9,11 +9,11 @@ # limitation. # $ErrorActionPreference="Stop" -$Archive="usr-x86_64-windows-2016-13.zip" +$Archive="usr-x86_64-windows-2016-14.zip" $Destination="C:\" -# If PyTorch is not version 2.3.1 then we need the latest download +# If PyTorch is not version 2.5.0 then we need the latest download if (!(Test-Path "$Destination\usr\local\include\pytorch\torch\csrc\api\include\torch\version.h") -Or - !(Select-String -Path "$Destination\usr\local\include\pytorch\torch\csrc\api\include\torch\version.h" -Pattern "2.3.1" -Quiet)) { + !(Select-String -Path "$Destination\usr\local\include\pytorch\torch\csrc\api\include\torch\version.h" -Pattern "2.5.0" -Quiet)) { Remove-Item "$Destination\usr" -Recurse -Force -ErrorAction Ignore $ZipSource="https://storage.googleapis.com/elastic-ml-public/dependencies/$Archive" $ZipDestination="$Env:TEMP\$Archive" diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 32af915f1e..cd190b04f8 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -28,6 +28,12 @@ //=== Regressions +== {es} version 8.18.0 + +=== Enhancements + +* Update the PyTorch library to version 2.5.0. (See {ml-pull}2783[#2783].) + == {es} version 8.16.0 === Enhancements From 73b518680815b1692d9e60812b8cae8a3ba27029 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Thu, 5 Dec 2024 17:40:25 +1300 Subject: [PATCH 16/38] [8.18][ML] Windows 2016 boost 1.86 (#2780) (#2791) Update Windows build scripts, documentation etc. to refer to Boost 1.86. Backports #2780 Depends on #2783 --- 3rd_party/3rd_party.cmake | 2 +- 3rd_party/licenses/boost-INFO.csv | 2 +- build-setup/windows.md | 14 +++++++------- cmake/variables.cmake | 7 ++++++- docs/CHANGELOG.asciidoc | 1 + lib/core/unittest/CMemoryUsageTest.cc | 12 ++++++------ lib/model/CBucketGatherer.cc | 2 ++ 7 files changed, 24 insertions(+), 16 deletions(-) diff --git a/3rd_party/3rd_party.cmake b/3rd_party/3rd_party.cmake index dd0ee72928..59b9ae2cb7 100644 --- a/3rd_party/3rd_party.cmake +++ b/3rd_party/3rd_party.cmake @@ -142,7 +142,7 @@ else() # server is currently set up set(BOOST_LOCATION "${LOCAL_DRIVE}/usr/local/lib") set(BOOST_COMPILER "vc") - set(BOOST_EXTENSION "mt-x64-1_83.dll") + set(BOOST_EXTENSION "mt-x64-1_86.dll") set(BOOST_LIBRARIES "atomic" "chrono" "date_time" "filesystem" "iostreams" "log" "log_setup" "program_options" "regex" "system" "thread" "unit_test_framework") set(XML_LOCATION "${LOCAL_DRIVE}/usr/local/bin") set(XML_EXTENSION ".dll") diff --git a/3rd_party/licenses/boost-INFO.csv b/3rd_party/licenses/boost-INFO.csv index c30b721cb4..9a9a86db27 100644 --- a/3rd_party/licenses/boost-INFO.csv +++ b/3rd_party/licenses/boost-INFO.csv @@ -1,2 +1,2 @@ name,version,revision,url,license,copyright,sourceURL -Boost C++ Libraries,1.83.0,,http://www.boost.org,BSL-1.0,, +Boost C++ Libraries,1.86.0,,http://www.boost.org,BSL-1.0,, diff --git a/build-setup/windows.md b/build-setup/windows.md index b3a00d0677..1e27855b19 100644 --- a/build-setup/windows.md +++ b/build-setup/windows.md @@ -119,33 +119,33 @@ nmake nmake install ``` -### Boost 1.83.0 +### Boost 1.86.0 -Download version 1.83.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. +Download version 1.86.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. Assuming you chose the `.bz2` version, extract it in a Git bash shell using the GNU tar that comes with Git for Windows, e.g.: ``` cd /c/tools -tar jxvf /z/cpp_src/boost_1_83_0.tar.bz2 +tar jxvf /z/cpp_src/boost_1_86_0.tar.bz2 ``` -Edit `boost/unordered/detail/prime_fmod.hpp` and change line 134 from: +Edit `boost/unordered/detail/prime_fmod.hpp` and change line 37 from: ``` - (13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \ + constexpr static std::size_t const sizes[] = {13ul, 29ul, 53ul, 97ul, ``` to: ``` - (3ul)(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \ + constexpr static std::size_t const sizes[] = {3ul, 13ul, 29ul, 53ul, 97ul, ``` Start a command prompt using Start Menu -> Apps -> Visual Studio 2019 -> x64 Native Tools Command Prompt for VS 2019, then in it type: ``` -cd \tools\boost_1_83_0 +cd \tools\boost_1_86_0 bootstrap.bat b2 -j6 --layout=versioned --disable-icu --toolset=msvc-14.2 cxxflags="-std:c++17" linkflags="-std:c++17" --build-type=complete -sZLIB_INCLUDE="C:\tools\zlib-1.2.13" -sZLIB_LIBPATH="C:\tools\zlib-1.2.13" -sZLIB_NAME=zdll --without-context --without-coroutine --without-graph_parallel --without-mpi --without-python architecture=x86 address-model=64 optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_WIN32_WINNT=0x0601 b2 install --prefix=C:\usr\local --layout=versioned --disable-icu --toolset=msvc-14.2 cxxflags="-std:c++17" linkflags="-std:c++17" --build-type=complete -sZLIB_INCLUDE="C:\tools\zlib-1.2.13" -sZLIB_LIBPATH="C:\tools\zlib-1.2.13" -sZLIB_NAME=zdll --without-context --without-coroutine --without-graph_parallel --without-mpi --without-python architecture=x86 address-model=64 optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_WIN32_WINNT=0x0601 diff --git a/cmake/variables.cmake b/cmake/variables.cmake index a1f557dd67..140284636f 100644 --- a/cmake/variables.cmake +++ b/cmake/variables.cmake @@ -243,7 +243,12 @@ set(Boost_USE_STATIC_LIBS OFF) set(Boost_USE_DEBUG_RUNTIME OFF) set(Boost_COMPILER "${ML_BOOST_COMPILER_VER}") -find_package(Boost 1.83.0 EXACT REQUIRED COMPONENTS iostreams filesystem program_options regex date_time log log_setup thread unit_test_framework) +set(Boost_VERSION 1.83.0) +if (CMAKE_SYSTEM_NAME STREQUAL "Windows") + message(WARNING "Using Boost 1.86.0 on Windows only.") + set(Boost_VERSION 1.86.0) +endif() +find_package(Boost ${Boost_VERSION} EXACT REQUIRED COMPONENTS iostreams filesystem program_options regex date_time log log_setup thread unit_test_framework) if(Boost_FOUND) list(APPEND ML_SYSTEM_INCLUDE_DIRECTORIES ${Boost_INCLUDE_DIRS}) endif() diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index cd190b04f8..bcfcd627dc 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -33,6 +33,7 @@ === Enhancements * Update the PyTorch library to version 2.5.0. (See {ml-pull}2783[#2783].) +* Upgrade Boost libraries to version 1.86. (See {ml-pull}2780[#2780].) == {es} version 8.16.0 diff --git a/lib/core/unittest/CMemoryUsageTest.cc b/lib/core/unittest/CMemoryUsageTest.cc index 91fd00c76c..de10581269 100644 --- a/lib/core/unittest/CMemoryUsageTest.cc +++ b/lib/core/unittest/CMemoryUsageTest.cc @@ -1287,12 +1287,12 @@ BOOST_AUTO_TEST_CASE(testSmallVector) { BOOST_REQUIRE_EQUAL(0, extraMem); growShrink.push_back(1.7); extraMem = core::memory::dynamicSize(growShrink); - // Interesting (shocking?) result: once a boost::small_vector has switched - // off of internal storage it will NEVER go back to internal storage. - // Arguably this is a bug, and this assertion might start failing after a - // Boost upgrade. If that happens and changing it to assert extraMem is 0 - // fixes it then this means boost::small_vector has been improved. - BOOST_TEST_REQUIRE(extraMem > 0); + // Interestingly we used to assert extraMem > 0 here as it used to be the case + // that once a boost::small_vector had switched + // off of internal storage it would NEVER go back to internal storage. + // Arguably that was a bug, and this assertion started failing after + // upgrading Boost to 1.86.0, meaning that boost::small_vector has been improved. + BOOST_TEST_REQUIRE(extraMem >= 0); // Change this to "==" when all platforms have been upgraded to Boost 1.86.0 } BOOST_AUTO_TEST_CASE(testAlignedVector) { diff --git a/lib/model/CBucketGatherer.cc b/lib/model/CBucketGatherer.cc index a4f85e23c4..7a987ec6ae 100644 --- a/lib/model/CBucketGatherer.cc +++ b/lib/model/CBucketGatherer.cc @@ -24,6 +24,8 @@ #include +#include + #include #include From f4a77b94868ecf20a55f2bfcf0fb5c879b01e7b2 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 9 Dec 2024 10:52:24 +1300 Subject: [PATCH 17/38] [8.18][ML] Upgrade to PyTorch 2.5 and Boost 1.86 for macOS ARM builds (#2794) Upgrade build scripts, docs etc. to refer to PyTorch 2.5 and Boost 1.86.0 for macOS ARM builds. Backports #2778 and #2779 --- 3rd_party/3rd_party.cmake | 2 +- build-setup/macos.md | 57 +++++++++++++++++++++------ cmake/variables.cmake | 9 ++++- dev-tools/download_macos_deps.sh | 2 +- docs/CHANGELOG.asciidoc | 4 +- lib/core/unittest/CMemoryUsageTest.cc | 2 +- 6 files changed, 56 insertions(+), 20 deletions(-) diff --git a/3rd_party/3rd_party.cmake b/3rd_party/3rd_party.cmake index 59b9ae2cb7..b8154578cd 100644 --- a/3rd_party/3rd_party.cmake +++ b/3rd_party/3rd_party.cmake @@ -49,7 +49,7 @@ if ("${HOST_SYSTEM_NAME}" STREQUAL "darwin") else() set(BOOST_ARCH "a64") endif() - set(BOOST_EXTENSION "mt-${BOOST_ARCH}-1_83.dylib") + set(BOOST_EXTENSION "mt-${BOOST_ARCH}-1_86.dylib") set(BOOST_LIBRARIES "atomic" "chrono" "date_time" "filesystem" "iostreams" "log" "log_setup" "program_options" "regex" "system" "thread" "unit_test_framework") set(XML_LOCATION) set(GCC_RT_LOCATION) diff --git a/build-setup/macos.md b/build-setup/macos.md index 9d3719384f..bbd04d488d 100644 --- a/build-setup/macos.md +++ b/build-setup/macos.md @@ -66,17 +66,17 @@ xcode-select --install at the command prompt. -### Boost 1.83.0 +### Boost 1.86.0 -Download version 1.83.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. +Download version 1.86.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. Assuming you chose the `.bz2` version, extract it to a temporary directory: ``` -bzip2 -cd boost_1_83_0.tar.bz2 | tar xvf - +bzip2 -cd boost_1_86_0.tar.bz2 | tar xvf - ``` -In the resulting `boost_1_83_0` directory, run: +In the resulting `boost_1_86_0` directory, run: ``` ./bootstrap.sh --with-toolset=clang --without-libraries=context --without-libraries=coroutine --without-libraries=graph_parallel --without-libraries=mpi --without-libraries=python --without-icu @@ -84,16 +84,16 @@ In the resulting `boost_1_83_0` directory, run: This should build the `b2` program, which in turn is used to build Boost. -Edit `boost/unordered/detail/prime_fmod.hpp` and change line 134 from +Edit `boost/unordered/detail/prime_fmod.hpp` and change line 37 from ``` - (13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \ + constexpr static std::size_t const sizes[] = {13ul, 29ul, 53ul, 97ul, ``` to: ``` - (3ul)(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \ + constexpr static std::size_t const sizes[] = {3ul, 13ul, 29ul, 53ul, 97ul, ``` @@ -101,7 +101,7 @@ To complete the build, type: ``` ./b2 -j8 --layout=versioned --disable-icu cxxflags="-std=c++17 -stdlib=libc++ $SSEFLAGS" linkflags="-std=c++17 -stdlib=libc++ -Wl,-headerpad_max_install_names" optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC -sudo ./b2 install --layout=versioned --disable-icu cxxflags="-std=c++17 -stdlib=libc++ $SSEFLAGS" linkflags="-std=c++17 -stdlib=libc++ -Wl,-headerpad_max_install_names" optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC +sudo ./b2 -j8 install --layout=versioned --disable-icu cxxflags="-std=c++17 -stdlib=libc++ $SSEFLAGS" linkflags="-std=c++17 -stdlib=libc++ -Wl,-headerpad_max_install_names" optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC ``` to install the Boost headers and libraries. @@ -127,7 +127,7 @@ Download the graphical installer for Python 3.10.9 from = 0); // Change this to "==" when all platforms have been upgraded to Boost 1.86.0 + BOOST_TEST_REQUIRE(extraMem >= 0); // Change to `==` once upgraded to Boost 1.86 on all platforms } BOOST_AUTO_TEST_CASE(testAlignedVector) { From 5e48d37c188277f77203b0d73e8adf53368d6025 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 10 Dec 2024 09:42:43 +1300 Subject: [PATCH 18/38] [8.18][ML] Drop support for macOS Intel builds (#2795) From version 8.18.0 we drop support for macos x86_64 builds. Therefore all support for cross compiling for macos x86_64 has also been dropped. Relates elastic/elasticsearch#104125 --- .buildkite/branch.json.py | 2 +- .buildkite/job-build-test-all-debug.json.py | 2 +- .buildkite/pipeline.json.py | 2 +- .buildkite/pipelines/build_macos.json.py | 22 +---- .buildkite/pipelines/create_dra.yml.sh | 1 - .buildkite/scripts/steps/build_and_test.sh | 22 +---- .ci/orka/README.md | 10 +-- build-setup/macos.md | 1 + build-setup/macos_cross_compiled.md | 93 -------------------- cmake/compiler/clang.cmake | 35 ++------ dev-tools/docker/README.md | 21 ----- dev-tools/docker/build_macosx_build_image.sh | 40 --------- dev-tools/docker/macosx_builder/Dockerfile | 37 -------- dev-tools/docker/macosx_image/Dockerfile | 59 ------------- dev-tools/docker_build.sh | 7 +- dev-tools/strip_binaries.sh | 26 +----- docs/CHANGELOG.asciidoc | 1 + 17 files changed, 19 insertions(+), 362 deletions(-) delete mode 100644 build-setup/macos_cross_compiled.md delete mode 100755 dev-tools/docker/build_macosx_build_image.sh delete mode 100644 dev-tools/docker/macosx_builder/Dockerfile delete mode 100644 dev-tools/docker/macosx_image/Dockerfile diff --git a/.buildkite/branch.json.py b/.buildkite/branch.json.py index 3c279e3a93..374326ddd5 100755 --- a/.buildkite/branch.json.py +++ b/.buildkite/branch.json.py @@ -34,7 +34,7 @@ def main(): build_windows = pipeline_steps.generate_step_template("Windows", "build", "", config.build_x86_64) pipeline_steps.append(build_windows) if config.build_macos: - build_macos = pipeline_steps.generate_step_template("MacOS", "build", config.build_aarch64, config.build_x86_64) + build_macos = pipeline_steps.generate_step_template("MacOS", "build", config.build_aarch64, "") pipeline_steps.append(build_macos) if config.build_linux: build_linux = pipeline_steps.generate_step_template("Linux", "build", config.build_aarch64, config.build_x86_64) diff --git a/.buildkite/job-build-test-all-debug.json.py b/.buildkite/job-build-test-all-debug.json.py index 1ce77a61ae..e02e0908bf 100755 --- a/.buildkite/job-build-test-all-debug.json.py +++ b/.buildkite/job-build-test-all-debug.json.py @@ -44,7 +44,7 @@ def main(): debug_windows = pipeline_steps.generate_step_template("Windows", "debug", "", config.build_x86_64) pipeline_steps.append(debug_windows) if config.build_macos: - debug_macos = pipeline_steps.generate_step_template("MacOS", "debug", config.build_aarch64, config.build_x86_64) + debug_macos = pipeline_steps.generate_step_template("MacOS", "debug", config.build_aarch64, "") pipeline_steps.append(debug_macos) if config.build_linux: debug_linux = pipeline_steps.generate_step_template("Linux", "debug", config.build_aarch64, config.build_x86_64) diff --git a/.buildkite/pipeline.json.py b/.buildkite/pipeline.json.py index 1840d5dc9c..a466636ec9 100755 --- a/.buildkite/pipeline.json.py +++ b/.buildkite/pipeline.json.py @@ -43,7 +43,7 @@ def main(): build_windows = pipeline_steps.generate_step_template("Windows", config.action, "", config.build_x86_64) pipeline_steps.append(build_windows) if config.build_macos: - build_macos = pipeline_steps.generate_step_template("MacOS", config.action, config.build_aarch64, config.build_x86_64) + build_macos = pipeline_steps.generate_step_template("MacOS", config.action, config.build_aarch64, "") pipeline_steps.append(build_macos) if config.build_linux: build_linux = pipeline_steps.generate_step_template("Linux", config.action, config.build_aarch64, config.build_x86_64) diff --git a/.buildkite/pipelines/build_macos.json.py b/.buildkite/pipelines/build_macos.json.py index 82d35cdb5c..7d20e24caf 100755 --- a/.buildkite/pipelines/build_macos.json.py +++ b/.buildkite/pipelines/build_macos.json.py @@ -21,8 +21,7 @@ from itertools import product archs = [ - "aarch64", - "x86_64", + "aarch64" ] build_types = [ "RelWithDebInfo", @@ -32,10 +31,6 @@ "debug" ] agents = { - "x86_64": { - "provider": "orka", - "image": "ml-macos-12-x86_64-001.img" - }, "aarch64": { "provider": "orka", "image": "ml-macos-12-arm-001.orkasi" @@ -51,16 +46,6 @@ "CMAKE_FLAGS": "-DCMAKE_TOOLCHAIN_FILE=cmake/darwin-aarch64.cmake", "RUN_TESTS": "true", "BOOST_TEST_OUTPUT_FORMAT_FLAGS": "--logger=JUNIT,error,boost_test_results.junit", - }, - "x86_64": { - "TMPDIR": "/tmp", - "HOMEBREW_PREFIX": "/opt/homebrew", - "PATH": "/opt/homebrew/bin:$PATH", - "ML_DEBUG": "0", - "CPP_CROSS_COMPILE": "", - "CMAKE_FLAGS": "-DCMAKE_TOOLCHAIN_FILE=cmake/darwin-x86_64.cmake", - "RUN_TESTS": "true", - "BOOST_TEST_OUTPUT_FORMAT_FLAGS": "--logger=JUNIT,error,boost_test_results.junit", } } @@ -122,11 +107,6 @@ def main(args): action='https://github.com/store_true', default=False, help="Build for aarch64?.") - parser.add_argument("--build-x86_64", - required=False, - action='https://github.com/store_true', - default=False, - help="Build for x86_64?") args = parser.parse_args() diff --git a/.buildkite/pipelines/create_dra.yml.sh b/.buildkite/pipelines/create_dra.yml.sh index 6a4b87f547..5873caf3f1 100755 --- a/.buildkite/pipelines/create_dra.yml.sh +++ b/.buildkite/pipelines/create_dra.yml.sh @@ -17,7 +17,6 @@ steps: depends_on: - "build_test_linux-aarch64-RelWithDebInfo" - "build_test_linux-x86_64-RelWithDebInfo" - - "build_test_macos-x86_64-RelWithDebInfo" - "build_test_macos-aarch64-RelWithDebInfo" - "build_test_Windows-x86_64-RelWithDebInfo" diff --git a/.buildkite/scripts/steps/build_and_test.sh b/.buildkite/scripts/steps/build_and_test.sh index af1c912052..b35aaa9ed8 100755 --- a/.buildkite/scripts/steps/build_and_test.sh +++ b/.buildkite/scripts/steps/build_and_test.sh @@ -90,29 +90,11 @@ else # Darwin (macOS) else TASKS="clean buildZip buildZipSymbols check" fi - # For macOS we usually only use a particular version as our build platform - # once Xcode has stopped receiving updates for it. However, with Big Sur - # on ARM we couldn't do this, as Big Sur was the first macOS version for - # ARM. Therefore, the compiler may get upgraded on a CI server, and we - # need to hardcode the version that was used to build Boost for that - # version of Elasticsearch. - if [ "$HARDWARE_ARCH" = aarch64 ] ; then - export BOOSTCLANGVER=13 - fi (cd ${REPO_ROOT} && ./gradlew --info -Dbuild.version_qualifier=${VERSION_QUALIFIER:-} -Dbuild.snapshot=$BUILD_SNAPSHOT -Dbuild.ml_debug=$ML_DEBUG $TASKS) || TEST_OUTCOME=$? else # Darwin x86_64 - # For macOS x86_64 we re-use existing Docker scripts and build directly on the machine - function nproc() { - sysctl -n hw.logicalcpu - } - export -f nproc - if [ "$RUN_TESTS" = "true" ]; then - ${REPO_ROOT}/dev-tools/docker/docker_entrypoint.sh --test - grep passed build/test_status.txt || TEST_OUTCOME=$? - else - ${REPO_ROOT}/dev-tools/docker/docker_entrypoint.sh - fi + echo "Unsupported architecture - macos x86_64" + exit 1 fi fi diff --git a/.ci/orka/README.md b/.ci/orka/README.md index 91978c43c5..7d55be17c6 100644 --- a/.ci/orka/README.md +++ b/.ci/orka/README.md @@ -19,10 +19,6 @@ If you haven't run these before, run the following once so packer downloads the ``` packer init orka-macos-12-arm.pkr.hcl ``` -or -``` -packer init orka-macos-12-x86_64.pkr.hcl -``` ## Build @@ -46,7 +42,6 @@ The source images used for the MacOS builds are slightly modified copies of the The source images are named: * `ml-macos-12-base-arm-fundamental.orkasi` - * `ml-macos-12-base-x86_64-fundamental.img` The source image only has the following changes on it: * Adding passwordless `sudo` for the default `admin` user @@ -70,7 +65,4 @@ The packer script does the following: ## Caveats -* Prior to the dependency on PyTorch 2.3.1 we only needed Orka for ARM builds (CI and dependencies), x86_64 builds were - performed via cross-compilation. However, PyTorch 2.3.1 now requires a more modern version of `clang` that our cross - compilation framework provided. As a suitable Orka base image is available for x86_64, it is now simpler to compile - natively for that architecture. +* As of version 8.18 support for macos x86_64 builds has been dropped. It is necessary to checkout and work on previous branches in order to maintain x86_64 Orka VMs. diff --git a/build-setup/macos.md b/build-setup/macos.md index bbd04d488d..4609c073ff 100644 --- a/build-setup/macos.md +++ b/build-setup/macos.md @@ -260,6 +260,7 @@ export PYTORCH_BUILD_NUMBER=1 Once built copy headers and libraries to system directories: ``` +sudo mkdir -p /usr/local/lib sudo mkdir -p /usr/local/include/pytorch sudo cp -r torch/include/* /usr/local/include/pytorch/ sudo cp torch/lib/libtorch_cpu.dylib /usr/local/lib/ diff --git a/build-setup/macos_cross_compiled.md b/build-setup/macos_cross_compiled.md deleted file mode 100644 index b6305b93ba..0000000000 --- a/build-setup/macos_cross_compiled.md +++ /dev/null @@ -1,93 +0,0 @@ -# Machine Learning Build Machine Setup for macOS cross compiled on Linux - -You will need the following environment variables to be defined: - -- `JAVA_HOME` - Should point to the JDK you want to use to run Gradle. -- `CPP_CROSS_COMPILE` - Should be set to "macosx". -- `CPP_SRC_HOME` - Only required if building the C++ code directly using `cmake`, as Gradle sets it automatically. - -For example, you might create a .bashrc file in your home directory containing this: - -``` -umask 0002 -export JAVA_HOME=/usr/local/jdk1.8.0_121 -export PATH=$JAVA_HOME/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin -# Only required if building the C++ code directly using cmake - adjust depending on the location of your Git clone -export CPP_SRC_HOME=$HOME/ml-cpp -export CPP_CROSS_COMPILE=macosx -``` - -### Initial Preparation - -Start by configuring a native macOS build server as described in [macos.md](macos.md). - -The remainder of these instructions assume the macOS build server you have configured is for macOS 10.14 (Mojave). This is what builds for distribution are currently built on. - -On the fully configured macOS build server, run the following commands: - -``` -cd /usr -tar jcvf ~/usr-x86_64-apple-macosx10.14.tar.bz2 lib local -cd /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr -tar jcvf ~/xcode-x86_64-apple-macosx10.14.tar.bz2 include -cd `xcrun --show-sdk-path`/usr -tar jcvf ~/sdk-x86_64-apple-macosx10.14.tar.bz2 include lib -``` - -These instructions also assume the host platform is Ubuntu 18.04. It makes life much easier if the host platform is a version of Ubuntu that's new enough to run the official binary distribution of clang/LLVM (otherwise it would be necessary to build clang/LLVM from source). - -Transfer the three archives created in your home directory on the macOS build server, `usr-x86_64-apple-macosx10.14.tar.bz2`, `xcode-x86_64-apple-macosx10.14.tar.bz2` and `sdk-x86_64-apple-macosx10.14.tar.bz2`, to your home directory on the cross compilation host build server. - -### OS Packages - -You need clang 8, plus a number of other build tools. They can be installed on modern Ubuntu as follows: - -``` -sudo apt-get install automake autogen build-essential bzip2 git gobjc libtool software-properties-common unzip wget - -wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - -sudo apt-add-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal main" -sudo apt-get install clang-8 clang-8-doc libclang1-8 libllvm8 lldb-8 llvm-8 llvm-8-doc llvm-8-runtime -``` - -(It is strongly recommended NOT to attempt to create a cross compile environment on an old version of Linux, because you will have to build clang from source and you need a modern C++ compiler to build clang. So you would probably end up first building a modern version of gcc using the system default gcc, then building clang using the modern gcc.) - -### Transferred Build Dependencies - -Add the dependencies that you copied from the fully configured macOS build server in the "Initial Preparation" step. - -``` -sudo mkdir -p /usr/local/sysroot-x86_64-apple-macosx10.14/usr -cd /usr/local/sysroot-x86_64-apple-macosx10.14/usr -sudo tar jxvf ~/usr-x86_64-apple-macosx10.14.tar.bz2 -sudo tar jxvf ~/xcode-x86_64-apple-macosx10.14.tar.bz2 -sudo tar jxvf ~/sdk-x86_64-apple-macosx10.14.tar.bz2 -``` - -### CMake - -CMake version 3.19.2 is the minimum required to build ml-cpp. Download version 3.23.2 from and install: - -``` -chmod +x cmake-3.23.2-Linux-x86_64.sh -sudo ./cmake-3.23.2-Linux-x86_64.sh --skip-license --prefix=/usr/local -``` - -### cctools-port - -You need to obtain Linux ports of several Apple development tools. The easiest way to get them is to use the [cctools-port project on GitHub](https://github.com/tpoechtrager/cctools-port): - -``` -git clone https://github.com/tpoechtrager/cctools-port.git -cd cctools-port/cctools -git checkout 949.0.1-ld64-530 -export CC=clang-8 -export CXX=clang++-8 -./autogen.sh -./configure --target=x86_64-apple-macosx10.14 --with-llvm-config=/usr/bin/llvm-config-8 -make -sudo make install -``` - -The "949.0.1-ld64-530" branch in the [cctools-port repository](https://github.com/tpoechtrager/cctools-port) corresponds to the tools for macOS 10.14 Mojave and clang 8. (A different branch would be required for newer versions of the OS/compiler.) - diff --git a/cmake/compiler/clang.cmake b/cmake/compiler/clang.cmake index bd9ff030db..1749ad0a89 100644 --- a/cmake/compiler/clang.cmake +++ b/cmake/compiler/clang.cmake @@ -9,36 +9,11 @@ # limitation. # -# which compilers to use for C and C++ -if(DEFINED ENV{CPP_CROSS_COMPILE} AND NOT "$ENV{CPP_CROSS_COMPILE}" STREQUAL "") - message(STATUS "Cross compiling: CPP_CROSS_COMPILE = $ENV{CPP_CROSS_COMPILE}") - - set(CROSS_FLAGS --sysroot=${SYSROOT} -B /usr/local/bin -target ${CROSS_TARGET_PLATFORM} -stdlib=libc++) - set(ML_SHARED_LINKER_FLAGS ${CROSS_FLAGS}) - set(ML_EXE_LINKER_FLAGS ${CROSS_FLAGS}) - - # which compilers to use for C and C++ - set(CMAKE_C_COMPILER "clang-8") - set(CMAKE_CXX_COMPILER "clang++-8") - - set(CMAKE_AR "/usr/local/bin/${CROSS_TARGET_PLATFORM}-ar") - set(CMAKE_RANLIB "/usr/local/bin/${CROSS_TARGET_PLATFORM}-ranlib") - set(CMAKE_STRIP "/usr/local/bin/${CROSS_TARGET_PLATFORM}-strip") - set(CMAKE_LD "/usr/local/bin/${CROSS_TARGET_PLATFORM}-ld") - - set(CMAKE_CXX_ARCHIVE_CREATE " -ru ") - - # where is the target environment located - set(CMAKE_FIND_ROOT_PATH /usr/local/sysroot-${CROSS_TARGET_PLATFORM}) -else() - set(CMAKE_C_COMPILER "clang") - set(CMAKE_CXX_COMPILER "clang++") - set(CMAKE_AR "ar") - set(CMAKE_RANLIB "ranlib") - set(CMAKE_STRIP "strip") - - #set(Boost_COMPILER "-clang-darwin13") -endif() +set(CMAKE_C_COMPILER "clang") +set(CMAKE_CXX_COMPILER "clang++") +set(CMAKE_AR "ar") +set(CMAKE_RANLIB "ranlib") +set(CMAKE_STRIP "strip") list(APPEND ML_C_FLAGS diff --git a/dev-tools/docker/README.md b/dev-tools/docker/README.md index 4d5716e083..4e4e9f26c4 100644 --- a/dev-tools/docker/README.md +++ b/dev-tools/docker/README.md @@ -129,24 +129,3 @@ This image is not intended to be built regularly. When changing the ### Build script: dev-tools/docker/build_check_style_image.sh - - -## REPOSITORY: ml-macosx-build - -### VERSION: 18 - -### Comments -A Docker image that can be used to **cross compile** the machine learning -C++ code for Intel macOS - -This image is not intended to be built regularly. When changing the tools -or 3rd party components required to build the machine learning C++ code: - - - 1. increment the version - 2. Change the Dockerfile and build a new image to be -used for subsequent builds on this branch. - 3. Update the version to be used for builds in *dev-tools/docker/macosx_builder/Dockerfile*. - -### Build script: dev-tools/docker/build_macosx_build_image.sh - diff --git a/dev-tools/docker/build_macosx_build_image.sh b/dev-tools/docker/build_macosx_build_image.sh deleted file mode 100755 index 3c915f62da..0000000000 --- a/dev-tools/docker/build_macosx_build_image.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -# -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License -# 2.0 and the following additional limitation. Functionality enabled by the -# files subject to the Elastic License 2.0 may only be used in production when -# invoked by an Elasticsearch process with a license key installed that permits -# use of machine learning features. You may not use this file except in -# compliance with the Elastic License 2.0 and the foregoing additional -# limitation. -# - -# Builds the Docker image that can be used to compile the machine learning -# C++ code for Intel macOS -# -# This script is not intended to be run regularly. When changing the tools -# or 3rd party components required to build the machine learning C++ code -# increment the version, change the Dockerfile and build a new image to be -# used for subsequent builds on this branch. Then update the version to be -# used for builds in docker/macosx_builder/Dockerfile. - -HOST=docker.elastic.co -ACCOUNT=ml-dev -REPOSITORY=ml-macosx-build -VERSION=19 - -set -e - -cd `dirname $0` - -. ./prefetch_docker_image.sh -CONTEXT=macosx_image -prefetch_docker_base_image $CONTEXT/Dockerfile -docker build --no-cache -t $HOST/$ACCOUNT/$REPOSITORY:$VERSION $CONTEXT -# Get a username and password for this by visiting -# https://docker-auth.elastic.co and allowing it to authenticate against your -# GitHub account -docker login $HOST -docker push $HOST/$ACCOUNT/$REPOSITORY:$VERSION - diff --git a/dev-tools/docker/macosx_builder/Dockerfile b/dev-tools/docker/macosx_builder/Dockerfile deleted file mode 100644 index 74f49698b4..0000000000 --- a/dev-tools/docker/macosx_builder/Dockerfile +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License -# 2.0 and the following additional limitation. Functionality enabled by the -# files subject to the Elastic License 2.0 may only be used in production when -# invoked by an Elasticsearch process with a license key installed that permits -# use of machine learning features. You may not use this file except in -# compliance with the Elastic License 2.0 and the foregoing additional -# limitation. -# - -# Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-macosx-build:19 - -MAINTAINER David Roberts - -# Copy the current Git repository into the container -COPY . /ml-cpp/ - -# Tell the build we want to cross compile -ENV CPP_CROSS_COMPILE macosx - -ENV CMAKE_FLAGS -DCMAKE_TOOLCHAIN_FILE=/ml-cpp/cmake/darwin-x86_64.cmake - -# Pass through any version qualifier (default none) -ARG VERSION_QUALIFIER= - -# Pass through whether this is a snapshot build (default yes if not specified) -ARG SNAPSHOT=yes - -# Pass through ML debug option (default blank) -ARG ML_DEBUG= - -# Run the build -RUN \ - /ml-cpp/dev-tools/docker/docker_entrypoint.sh - diff --git a/dev-tools/docker/macosx_image/Dockerfile b/dev-tools/docker/macosx_image/Dockerfile deleted file mode 100644 index 4f5db531a0..0000000000 --- a/dev-tools/docker/macosx_image/Dockerfile +++ /dev/null @@ -1,59 +0,0 @@ -# -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License -# 2.0 and the following additional limitation. Functionality enabled by the -# files subject to the Elastic License 2.0 may only be used in production when -# invoked by an Elasticsearch process with a license key installed that permits -# use of machine learning features. You may not use this file except in -# compliance with the Elastic License 2.0 and the foregoing additional -# limitation. -# - -FROM ubuntu:20.04 - -# This is basically automating the setup instructions in build-setup/macos_cross_compiled.md - -MAINTAINER David Roberts - -# Make sure apt-get is up to date and required packages are installed -RUN \ - export DEBIAN_FRONTEND=noninteractive && \ - apt-get update && \ - apt-get install --no-install-recommends -y apt-utils automake autogen build-essential bzip2 git gobjc gpg-agent libtool software-properties-common unzip wget zip - -# Install clang -RUN \ - wget --quiet -O - http://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ - apt-add-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal main" && \ - apt-get install --no-install-recommends -y clang-8 libclang1-8 libllvm8 llvm-8 llvm-8-runtime - -# Add build dependencies transferred from native Mac build server -RUN \ - mkdir -p /usr/local/sysroot-x86_64-apple-macosx10.14/usr && \ - cd /usr/local/sysroot-x86_64-apple-macosx10.14/usr && \ - wget --quiet -O - https://s3-eu-west-2.amazonaws.com/ml-cpp-artifacts/dependencies/usr-x86_64-apple-macosx10.14-10.tar.bz2 | tar jxf - && \ - wget --quiet -O - https://s3-eu-west-2.amazonaws.com/ml-cpp-artifacts/dependencies/xcode-x86_64-apple-macosx10.14-1.tar.bz2 | tar jxf - && \ - wget --quiet -O - https://s3-eu-west-2.amazonaws.com/ml-cpp-artifacts/dependencies/sdk-x86_64-apple-macosx10.14-1.tar.bz2 | tar jxf - - -# Build cctools-port -RUN \ - git clone https://github.com/tpoechtrager/cctools-port.git && \ - cd cctools-port/cctools && \ - git checkout 949.0.1-ld64-530 && \ - export CC=clang-8 && \ - export CXX=clang++-8 && \ - ./autogen.sh && \ - ./configure --target=x86_64-apple-macosx10.14 --with-llvm-config=/usr/bin/llvm-config-8 && \ - make -j`nproc` && \ - make install && \ - cd ../.. && \ - rm -rf cctools-port - -# Install CMake -# v3.19.2 minimum is required -RUN \ - wget --quiet https://github.com/Kitware/CMake/releases/download/v3.23.2/cmake-3.23.2-Linux-x86_64.sh && \ - chmod +x cmake-3.23.2-Linux-x86_64.sh && \ - ./cmake-3.23.2-Linux-x86_64.sh --skip-license --prefix=/usr/local && \ - rm -f cmake-3.23.2-Linux-x86_64.sh - diff --git a/dev-tools/docker_build.sh b/dev-tools/docker_build.sh index 47f1064f91..fc1eac34aa 100755 --- a/dev-tools/docker_build.sh +++ b/dev-tools/docker_build.sh @@ -10,8 +10,7 @@ # limitation. # -# Builds the machine learning C++ code for Linux or macOS in a Docker -# container. +# Builds the machine learning C++ code for Linux in a Docker container. # # The output .zip files are then copied out of the container to the # location in the current repository that they'd be in had they been @@ -20,7 +19,7 @@ # Finally, the Docker container used for the build is deleted. usage() { - echo "Usage: $0 linux|linux_aarch64_cross|linux_aarch64_native|macosx ..." + echo "Usage: $0 linux|linux_aarch64_cross|linux_aarch64_native ..." exit 1 } @@ -30,7 +29,7 @@ while [ -n "$1" ] do case "$1" in - linux|linux_aarch64_cross|linux_aarch64_native|macosx) + linux|linux_aarch64_cross|linux_aarch64_native) PLATFORMS="$1 $PLATFORMS" ;; *) diff --git a/dev-tools/strip_binaries.sh b/dev-tools/strip_binaries.sh index eef5933474..ca4d7103fe 100755 --- a/dev-tools/strip_binaries.sh +++ b/dev-tools/strip_binaries.sh @@ -21,13 +21,8 @@ case `uname` in ;; Linux) - if [ "$CPP_CROSS_COMPILE" = macosx ] ; then - EXE_DIR="$ML_APP_NAME.app/Contents/MacOS" - DYNAMIC_LIB_DIR="$ML_APP_NAME.app/Contents/lib" - else - EXE_DIR=bin - DYNAMIC_LIB_DIR=lib - fi + EXE_DIR=bin + DYNAMIC_LIB_DIR=lib ;; esac @@ -91,23 +86,6 @@ case `uname` in strip --strip-unneeded $LIBRARY objcopy --add-gnu-debuglink="$LIBRARY-debug" "$LIBRARY" done - elif [ "$CPP_CROSS_COMPILE" = macosx ] ; then - CROSS_TARGET_PLATFORM=x86_64-apple-macosx10.14 - for PROGRAM in `ls -1d "$EXE_DIR"/* | grep -v '\.dSYM$'` - do - echo "Stripping $PROGRAM" - dsymutil-8 $PROGRAM - /usr/local/bin/$CROSS_TARGET_PLATFORM-strip -u -r $PROGRAM - done - for LIBRARY in `ls -1d "$DYNAMIC_LIB_DIR"/* | grep -v '\.dSYM$'` - do - echo "Stripping $LIBRARY" - case $LIBRARY in - *Ml*) - dsymutil-8 $LIBRARY - esac - /usr/local/bin/$CROSS_TARGET_PLATFORM-strip -x $LIBRARY - done else CROSS_TARGET_PLATFORM=$CPP_CROSS_COMPILE-linux-gnu for PROGRAM in `ls -1 "$EXE_DIR"/* | egrep -v "$EXE_DIR"'/core|-debug$'` diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index deaf4801be..6226cd9d53 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -34,6 +34,7 @@ * Update the PyTorch library to version 2.5.0. (See {ml-pull}2783[#2783], {ml-pull}2778[#2778].) * Upgrade Boost libraries to version 1.86. (See {ml-pull}2780[#2780], {ml-pull}2779[#2779].) +* Drop support for macOS Intel builds. (See {ml-pull}2795[#2795].) == {es} version 8.16.0 From e6837afcc99eb7e4bfcca89cd756cda006fbff84 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 11 Dec 2024 16:33:16 +1300 Subject: [PATCH 19/38] [ML] Windows 2022: Upgrade PyTorch to version 2.5.1 (#2799) (#2801) Update docs and configuration to refer to PyTorch 2.5.1 --- build-setup/windows.md | 6 +++--- dev-tools/download_windows_deps.ps1 | 6 +++--- docs/CHANGELOG.asciidoc | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/build-setup/windows.md b/build-setup/windows.md index 1e27855b19..461d009e8f 100644 --- a/build-setup/windows.md +++ b/build-setup/windows.md @@ -193,7 +193,7 @@ On the "Advanced Options" screen, check "Install for all users" and "Add Python For the time being, do not take advantage of the option on the final installer screen to reconfigure the machine to allow paths longer than 260 characters. We still support Windows versions that do not have this option. -### PyTorch 2.5.0 +### PyTorch 2.5.1 (This step requires a lot of memory. It failed on a machine with 12GB of RAM. It just about fitted on a 20GB machine. 32GB RAM is recommended.) @@ -209,7 +209,7 @@ Next, in a Git bash shell run: ``` cd /c/tools -git clone --depth=1 --branch=v2.5.0 https://github.com/pytorch/pytorch.git +git clone --depth=1 --branch=v2.5.1 https://github.com/pytorch/pytorch.git cd pytorch git submodule sync git submodule update --init --recursive @@ -265,7 +265,7 @@ set USE_QNNPACK=OFF set USE_PYTORCH_QNNPACK=OFF set USE_XNNPACK=OFF set MSVC_Z7_OVERRIDE=OFF -set PYTORCH_BUILD_VERSION=2.5.0 +set PYTORCH_BUILD_VERSION=2.5.1 set PYTORCH_BUILD_NUMBER=1 python setup.py install ``` diff --git a/dev-tools/download_windows_deps.ps1 b/dev-tools/download_windows_deps.ps1 index 9a303a484c..dbda289947 100755 --- a/dev-tools/download_windows_deps.ps1 +++ b/dev-tools/download_windows_deps.ps1 @@ -9,11 +9,11 @@ # limitation. # $ErrorActionPreference="Stop" -$Archive="usr-x86_64-windows-2016-14.zip" +$Archive="usr-x86_64-windows-2016-15.zip" $Destination="C:\" -# If PyTorch is not version 2.5.0 then we need the latest download +# If PyTorch is not version 2.5.1 then we need the latest download if (!(Test-Path "$Destination\usr\local\include\pytorch\torch\csrc\api\include\torch\version.h") -Or - !(Select-String -Path "$Destination\usr\local\include\pytorch\torch\csrc\api\include\torch\version.h" -Pattern "2.5.0" -Quiet)) { + !(Select-String -Path "$Destination\usr\local\include\pytorch\torch\csrc\api\include\torch\version.h" -Pattern "2.5.1" -Quiet)) { Remove-Item "$Destination\usr" -Recurse -Force -ErrorAction Ignore $ZipSource="https://storage.googleapis.com/elastic-ml-public/dependencies/$Archive" $ZipDestination="$Env:TEMP\$Archive" diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 6226cd9d53..68be028ef6 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -32,7 +32,7 @@ === Enhancements -* Update the PyTorch library to version 2.5.0. (See {ml-pull}2783[#2783], {ml-pull}2778[#2778].) +* Update the PyTorch library to version 2.5.1. (See {ml-pull}2783[#2783], {ml-pull}2799[#2799].) * Upgrade Boost libraries to version 1.86. (See {ml-pull}2780[#2780], {ml-pull}2779[#2779].) * Drop support for macOS Intel builds. (See {ml-pull}2795[#2795].) From 33147f2849794335ebe4c29115bf094860b77724 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Wed, 11 Dec 2024 09:32:45 +0100 Subject: [PATCH 20/38] [8.x][ML] Update Boost and PyTorch versions for Linux (#2789) Upgrade Boost to version 1.86.0 and PyTorch to version 2.5.1 in the Dockerfile to ensure compatibility with the latest features and improvements. --- .buildkite/pipelines/build_linux.json.py | 4 ++-- 3rd_party/3rd_party.cmake | 6 +++--- build-setup/linux.md | 21 ++++++++++++------- cmake/variables.cmake | 11 +--------- dev-tools/docker/README.md | 8 +++---- .../build_linux_aarch64_cross_build_image.sh | 2 +- .../linux_aarch64_cross_builder/Dockerfile | 2 +- .../linux_aarch64_cross_image/Dockerfile | 2 +- .../linux_aarch64_native_builder/Dockerfile | 2 +- .../linux_aarch64_native_image/Dockerfile | 13 ++++++------ .../linux_aarch64_native_tester/Dockerfile | 2 +- dev-tools/docker/linux_builder/Dockerfile | 2 +- .../linux_dependency_builder_image/Dockerfile | 2 +- dev-tools/docker/linux_image/Dockerfile | 13 ++++++------ dev-tools/docker/linux_tester/Dockerfile | 2 +- 15 files changed, 46 insertions(+), 46 deletions(-) diff --git a/.buildkite/pipelines/build_linux.json.py b/.buildkite/pipelines/build_linux.json.py index 8416a9a042..3178391fbf 100755 --- a/.buildkite/pipelines/build_linux.json.py +++ b/.buildkite/pipelines/build_linux.json.py @@ -36,7 +36,7 @@ "cpu": "6", "ephemeralStorage": "20G", "memory": "64G", - "image": os.getenv("DOCKER_IMAGE", "docker.elastic.co/ml-dev/ml-linux-build:30") + "image": os.getenv("DOCKER_IMAGE", "docker.elastic.co/ml-dev/ml-linux-build:32") }, "aarch64": { "provider": "aws", @@ -101,7 +101,7 @@ def main(args): "cpu": "6", "ephemeralStorage": "20G", "memory": "64G", - "image": "docker.elastic.co/ml-dev/ml-linux-aarch64-cross-build:13" + "image": "docker.elastic.co/ml-dev/ml-linux-aarch64-cross-build:15" }, "commands": [ ".buildkite/scripts/steps/build_and_test.sh" diff --git a/3rd_party/3rd_party.cmake b/3rd_party/3rd_party.cmake index b8154578cd..0abe84e850 100644 --- a/3rd_party/3rd_party.cmake +++ b/3rd_party/3rd_party.cmake @@ -73,7 +73,7 @@ elseif ("${HOST_SYSTEM_NAME}" STREQUAL "linux") set(MKL_PREFIX "libmkl_") set(MKL_LIBRARIES "avx2" "avx512" "core" "def" "gnu_thread" "intel_lp64" "mc3" "vml_avx2" "vml_avx512" "vml_cmpt" "vml_def" "vml_mc3") endif() - set(BOOST_EXTENSION mt-${BOOST_ARCH}-1_83.so.1.83.0) + set(BOOST_EXTENSION mt-${BOOST_ARCH}-1_86.so.1.86.0) set(BOOST_LIBRARIES "atomic" "chrono" "date_time" "filesystem" "iostreams" "log" "log_setup" "program_options" "regex" "system" "thread" "unit_test_framework") set(XML_LOCATION "/usr/local/gcc103/lib") set(XML_EXTENSION ".so.2") @@ -94,7 +94,7 @@ elseif ("${HOST_SYSTEM_NAME}" STREQUAL "linux") set(SYSROOT "/usr/local/sysroot-x86_64-apple-macosx10.14") set(BOOST_LOCATION "${SYSROOT}/usr/local/lib") set(BOOST_COMPILER "clang") - set(BOOST_EXTENSION "mt-x64-1_83.dylib") + set(BOOST_EXTENSION "mt-x64-1_86.dylib") set(BOOST_LIBRARIES "atomic" "chrono" "date_time" "filesystem" "iostreams" "log" "log_setup" "program_options" "regex" "system" "thread" "unit_test_framework") set(XML_LOCATION) set(GCC_RT_LOCATION) @@ -115,7 +115,7 @@ elseif ("${HOST_SYSTEM_NAME}" STREQUAL "linux") message(FATAL_ERROR "Cannot cross compile to $ENV{CPP_CROSS_COMPILE}") return() endif() - set(BOOST_EXTENSION "mt-${BOOST_ARCH}-1_83.so.1.83.0") + set(BOOST_EXTENSION "mt-${BOOST_ARCH}-1_86.so.1.86.0") set(BOOST_LIBRARIES "atomic" "chrono" "date_time" "filesystem" "iostreams" "log" "log_setup" "program_options" "regex" "system" "thread" "unit_test_framework") set(XML_LOCATION "${SYSROOT}/usr/local/gcc103/lib") set(XML_EXTENSION ".so.2") diff --git a/build-setup/linux.md b/build-setup/linux.md index 1017c1eb94..60b8cd263b 100644 --- a/build-setup/linux.md +++ b/build-setup/linux.md @@ -171,17 +171,17 @@ sudo make install to install. -### Boost 1.83.0 +### Boost 1.86.0 -Download version 1.83.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. +Download version 1.86.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. Assuming you chose the `.bz2` version, extract it to a temporary directory: ``` -bzip2 -cd boost_1_83_0.tar.bz2 | tar xvf - +bzip2 -cd boost_1_86_0.tar.bz2 | tar xvf - ``` -In the resulting `boost_1_83_0` directory, run: +In the resulting `boost_1_86_0` directory, run: ``` ./bootstrap.sh --without-libraries=context --without-libraries=coroutine --without-libraries=graph_parallel --without-libraries=mpi --without-libraries=python --without-icu @@ -332,7 +332,7 @@ Then copy the shared libraries to the system directory: (cd /opt/intel/oneapi/mkl/2024.0 && tar cf - lib) | (cd /usr/local/gcc103 && sudo tar xvf -) ``` -### PyTorch 2.3.1 +### PyTorch 2.5.1 (This step requires a reasonable amount of memory. It failed on a machine with 8GB of RAM. It succeeded on a 16GB machine. You can specify the number of parallel jobs using environment variable MAX_JOBS. Lower number of jobs will reduce memory usage.) @@ -351,7 +351,7 @@ sudo /usr/local/gcc103/bin/python3.10 -m pip install install numpy pyyaml setupt Then obtain the PyTorch code: ``` -git clone --depth=1 --branch=v2.3.1 git@github.com:pytorch/pytorch.git +git clone --depth=1 --branch=v2.5.1 git@github.com:pytorch/pytorch.git cd pytorch git submodule sync git submodule update --init --recursive @@ -365,6 +365,13 @@ a heuristic virus scanner looking for potentially dangerous function calls in our shipped product will not encounter these functions that run external processes. +Edit the file `./third_party/onnx/CMakeLists.txt` and inserts the line +``` +set(PYTHON_EXECUTABLE "/usr/local/bin/python3.10") +``` +before line 104. This line sets the PYTHON_EXECUTABLE variable to the specified Python +executable path in the CMake configuration file. + Build as follows: ``` @@ -379,7 +386,7 @@ export USE_MKLDNN=ON export USE_QNNPACK=OFF export USE_PYTORCH_QNNPACK=OFF [ $(uname -m) = x86_64 ] && export USE_XNNPACK=OFF -export PYTORCH_BUILD_VERSION=2.3.1 +export PYTORCH_BUILD_VERSION=2.5.1 export PYTORCH_BUILD_NUMBER=1 /usr/local/gcc103/bin/python3.10 setup.py install ``` diff --git a/cmake/variables.cmake b/cmake/variables.cmake index 046802cf0d..54a2401a47 100644 --- a/cmake/variables.cmake +++ b/cmake/variables.cmake @@ -243,16 +243,7 @@ set(Boost_USE_STATIC_LIBS OFF) set(Boost_USE_DEBUG_RUNTIME OFF) set(Boost_COMPILER "${ML_BOOST_COMPILER_VER}") -set (Boost_VERSION 1.83.0) -if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64") - message(WARNING "Using Boost 1.86.0 on macOS.") - set (Boost_VERSION 1.86.0) -endif() - -if (CMAKE_SYSTEM_NAME STREQUAL "Windows") - message(WARNING "Using Boost 1.86.0 on Windows.") - set(Boost_VERSION 1.86.0) -endif() +set(Boost_VERSION 1.86.0) find_package(Boost ${Boost_VERSION} EXACT REQUIRED COMPONENTS iostreams filesystem program_options regex date_time log log_setup thread unit_test_framework) if(Boost_FOUND) list(APPEND ML_SYSTEM_INCLUDE_DIRECTORIES ${Boost_INCLUDE_DIRS}) diff --git a/dev-tools/docker/README.md b/dev-tools/docker/README.md index 4e4e9f26c4..89314f61cc 100644 --- a/dev-tools/docker/README.md +++ b/dev-tools/docker/README.md @@ -36,7 +36,7 @@ required to build the machine learning C++ code dependencies: 2. Change the Dockerfile and build a new image to be used for subsequent builds on this branch. 3. Update the version to be used for builds in docker files that refer to it. -### Depends on: ml-linux-build:30 +### Depends on: ml-linux-build:32 ### Build script: dev-tools/docker/build_linux_dependency_builder_image.sh @@ -44,7 +44,7 @@ required to build the machine learning C++ code dependencies: ## Repository: ml-linux-build -### Latest version: 30 +### Latest version: 32 ### Comments A Docker image that can be used to compile the machine learning @@ -63,7 +63,7 @@ used for subsequent builds on this branch. ## Repository: ml-linux-aarch64-cross-build -### Latest version: 12 +### Latest version: 15 ### Comments A Docker image that can be used to compile the machine learning @@ -82,7 +82,7 @@ used for subsequent builds on this branch. ## Repository: ml-linux-aarch64-native-build -### Latest version: 12 +### Latest version: 15 ### Comments A Docker image that can be used to compile the machine learning diff --git a/dev-tools/docker/build_linux_aarch64_cross_build_image.sh b/dev-tools/docker/build_linux_aarch64_cross_build_image.sh index 32263288b8..4289d1c72a 100755 --- a/dev-tools/docker/build_linux_aarch64_cross_build_image.sh +++ b/dev-tools/docker/build_linux_aarch64_cross_build_image.sh @@ -22,7 +22,7 @@ HOST=docker.elastic.co ACCOUNT=ml-dev REPOSITORY=ml-linux-aarch64-cross-build -VERSION=13 +VERSION=15 set -e diff --git a/dev-tools/docker/linux_aarch64_cross_builder/Dockerfile b/dev-tools/docker/linux_aarch64_cross_builder/Dockerfile index 19bc1611a6..4169c321a0 100644 --- a/dev-tools/docker/linux_aarch64_cross_builder/Dockerfile +++ b/dev-tools/docker/linux_aarch64_cross_builder/Dockerfile @@ -10,7 +10,7 @@ # # Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-linux-aarch64-cross-build:13 +FROM docker.elastic.co/ml-dev/ml-linux-aarch64-cross-build:15 MAINTAINER David Roberts diff --git a/dev-tools/docker/linux_aarch64_cross_image/Dockerfile b/dev-tools/docker/linux_aarch64_cross_image/Dockerfile index ffd0d6f289..20559d1bb0 100644 --- a/dev-tools/docker/linux_aarch64_cross_image/Dockerfile +++ b/dev-tools/docker/linux_aarch64_cross_image/Dockerfile @@ -27,7 +27,7 @@ RUN \ RUN \ mkdir -p /usr/local/sysroot-aarch64-linux-gnu/usr && \ cd /usr/local/sysroot-aarch64-linux-gnu/usr && \ - wget --quiet -O - https://s3-eu-west-2.amazonaws.com/ml-cpp-artifacts/dependencies/usr-aarch64-linux-gnu-13.tar.bz2 | tar jxf - && \ + wget --quiet -O - https://s3-eu-west-2.amazonaws.com/ml-cpp-artifacts/dependencies/usr-aarch64-linux-gnu-15.tar.bz2 | tar jxf - && \ cd .. && \ ln -s usr/lib lib && \ ln -s usr/lib64 lib64 diff --git a/dev-tools/docker/linux_aarch64_native_builder/Dockerfile b/dev-tools/docker/linux_aarch64_native_builder/Dockerfile index 223ab2d974..ec326a4753 100644 --- a/dev-tools/docker/linux_aarch64_native_builder/Dockerfile +++ b/dev-tools/docker/linux_aarch64_native_builder/Dockerfile @@ -10,7 +10,7 @@ # # Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-linux-aarch64-native-build:13 +FROM docker.elastic.co/ml-dev/ml-linux-aarch64-native-build:15 MAINTAINER David Roberts diff --git a/dev-tools/docker/linux_aarch64_native_image/Dockerfile b/dev-tools/docker/linux_aarch64_native_image/Dockerfile index a8c75b5240..e00a5f9b6e 100644 --- a/dev-tools/docker/linux_aarch64_native_image/Dockerfile +++ b/dev-tools/docker/linux_aarch64_native_image/Dockerfile @@ -78,14 +78,14 @@ RUN \ # Build Boost RUN \ cd ${build_dir} && \ - wget --quiet -O - https://boostorg.jfrog.io/artifactory/main/release/1.83.0/source/boost_1_83_0.tar.bz2 | tar jxf - && \ - cd boost_1_83_0 && \ + wget --quiet -O - https://boostorg.jfrog.io/artifactory/main/release/1.86.0/source/boost_1_86_0.tar.bz2 | tar jxf - && \ + cd boost_1_86_0 && \ ./bootstrap.sh --without-libraries=context --without-libraries=coroutine --without-libraries=graph_parallel --without-libraries=mpi --without-libraries=python --without-icu && \ - sed -i -e 's|(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \\|(3ul)(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \\|' boost/unordered/detail/prime_fmod.hpp && \ + sed -i -e 's/{13ul/{3ul, 13ul/' boost/unordered/detail/prime_fmod.hpp&& \ ./b2 -j`nproc` --layout=versioned --disable-icu pch=off optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_FORTIFY_SOURCE=2 cxxflags='-std=gnu++17 -fstack-protector -march=armv8-a+crc+crypto' linkflags='-std=gnu++17 -Wl,-z,relro -Wl,-z,now' && \ ./b2 install --prefix=/usr/local/gcc103 --layout=versioned --disable-icu pch=off optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_FORTIFY_SOURCE=2 cxxflags='-std=gnu++17 -fstack-protector -march=armv8-a+crc+crypto' linkflags='-std=gnu++17 -Wl,-z,relro -Wl,-z,now' && \ cd .. && \ - rm -rf boost_1_83_0 + rm -rf boost_1_86_0 # Build patchelf RUN \ @@ -141,11 +141,12 @@ RUN \ # If the PyTorch branch is changed also update PYTORCH_BUILD_VERSION RUN \ cd ${build_dir} && \ - git -c advice.detachedHead=false clone --depth=1 --branch=v2.3.1 https://github.com/pytorch/pytorch.git && \ + git -c advice.detachedHead=false clone --depth=1 --branch=v2.5.1 https://github.com/pytorch/pytorch.git && \ cd pytorch && \ git submodule sync && \ git submodule update --init --recursive && \ sed -i -e 's/system(/strlen(/' torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp && \ + sed -i -e '104 i set(PYTHON_EXECUTABLE "/usr/local/bin/python3.10")' ./third_party/onnx/CMakeLists.txt && \ export BLAS=Eigen && \ export BUILD_TEST=OFF && \ export USE_FBGEMM=OFF && \ @@ -154,7 +155,7 @@ RUN \ export USE_MKLDNN=ON && \ export USE_QNNPACK=OFF && \ export USE_PYTORCH_QNNPACK=OFF && \ - export PYTORCH_BUILD_VERSION=2.3.1 && \ + export PYTORCH_BUILD_VERSION=2.5.1 && \ export PYTORCH_BUILD_NUMBER=1 && \ /usr/local/bin/python3.10 setup.py install && \ mkdir /usr/local/gcc103/include/pytorch && \ diff --git a/dev-tools/docker/linux_aarch64_native_tester/Dockerfile b/dev-tools/docker/linux_aarch64_native_tester/Dockerfile index eb528694e1..3cccfbfc36 100644 --- a/dev-tools/docker/linux_aarch64_native_tester/Dockerfile +++ b/dev-tools/docker/linux_aarch64_native_tester/Dockerfile @@ -10,7 +10,7 @@ # # Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-linux-aarch64-native-build:13 +FROM docker.elastic.co/ml-dev/ml-linux-aarch64-native-build:15 MAINTAINER David Roberts diff --git a/dev-tools/docker/linux_builder/Dockerfile b/dev-tools/docker/linux_builder/Dockerfile index f21591e988..f9f2ecdc0d 100644 --- a/dev-tools/docker/linux_builder/Dockerfile +++ b/dev-tools/docker/linux_builder/Dockerfile @@ -10,7 +10,7 @@ # # Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-linux-build:30 +FROM docker.elastic.co/ml-dev/ml-linux-build:32 MAINTAINER David Roberts diff --git a/dev-tools/docker/linux_dependency_builder_image/Dockerfile b/dev-tools/docker/linux_dependency_builder_image/Dockerfile index 9b57f974c1..33defeecc1 100644 --- a/dev-tools/docker/linux_dependency_builder_image/Dockerfile +++ b/dev-tools/docker/linux_dependency_builder_image/Dockerfile @@ -10,7 +10,7 @@ # # Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-linux-build:30 AS builder +FROM docker.elastic.co/ml-dev/ml-linux-build:32 AS builder # This is basically automating the setup instructions in build-setup/linux.md diff --git a/dev-tools/docker/linux_image/Dockerfile b/dev-tools/docker/linux_image/Dockerfile index 99e957a363..be6a58ca31 100644 --- a/dev-tools/docker/linux_image/Dockerfile +++ b/dev-tools/docker/linux_image/Dockerfile @@ -78,14 +78,14 @@ RUN \ # Build Boost RUN \ cd ${build_dir} && \ - wget --quiet -O - https://boostorg.jfrog.io/artifactory/main/release/1.83.0/source/boost_1_83_0.tar.bz2 | tar jxf - && \ - cd boost_1_83_0 && \ + wget --quiet -O - https://boostorg.jfrog.io/artifactory/main/release/1.86.0/source/boost_1_86_0.tar.bz2 | tar jxf - && \ + cd boost_1_86_0 && \ ./bootstrap.sh --without-libraries=context --without-libraries=coroutine --without-libraries=graph_parallel --without-libraries=mpi --without-libraries=python --without-icu && \ - sed -i -e 's|(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \\|(3ul)(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \\|' boost/unordered/detail/prime_fmod.hpp && \ + sed -i -e 's/{13ul/{3ul, 13ul/' boost/unordered/detail/prime_fmod.hpp && \ ./b2 -j`nproc` --layout=versioned --disable-icu pch=off optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_FORTIFY_SOURCE=2 cxxflags='-std=gnu++17 -fstack-protector -msse4.2 -mfpmath=sse' cflags='-D__STDC_FORMAT_MACROS' linkflags='-std=gnu++17 -Wl,-z,relro -Wl,-z,now' && \ ./b2 install --prefix=/usr/local/gcc103 --layout=versioned --disable-icu pch=off optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_FORTIFY_SOURCE=2 cxxflags='-std=gnu++17 -fstack-protector -msse4.2 -mfpmath=sse' cflags='-D__STDC_FORMAT_MACROS' linkflags='-std=gnu++17 -Wl,-z,relro -Wl,-z,now' && \ cd .. && \ - rm -rf boost_1_83_0 + rm -rf boost_1_86_0 # Build patchelf RUN \ @@ -154,11 +154,12 @@ gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS. # If the PyTorch branch is changed also update PYTORCH_BUILD_VERSION RUN \ cd ${build_dir} && \ - git -c advice.detachedHead=false clone --depth=1 --branch=v2.3.1 https://github.com/pytorch/pytorch.git && \ + git -c advice.detachedHead=false clone --depth=1 --branch=v2.5.1 https://github.com/pytorch/pytorch.git && \ cd pytorch && \ git submodule sync && \ git submodule update --init --recursive && \ sed -i -e 's/system(/strlen(/' torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp && \ + sed -i -e '104 i set(PYTHON_EXECUTABLE "/usr/local/bin/python3.10")' ./third_party/onnx/CMakeLists.txt && \ export BLAS=MKL && \ export BUILD_TEST=OFF && \ export BUILD_CAFFE2=OFF && \ @@ -168,7 +169,7 @@ RUN \ export USE_QNNPACK=OFF && \ export USE_PYTORCH_QNNPACK=OFF && \ export USE_XNNPACK=OFF && \ - export PYTORCH_BUILD_VERSION=2.3.1 && \ + export PYTORCH_BUILD_VERSION=2.5.1 && \ export PYTORCH_BUILD_NUMBER=1 && \ export MAX_JOBS=10 && \ /usr/local/bin/python3.10 setup.py install && \ diff --git a/dev-tools/docker/linux_tester/Dockerfile b/dev-tools/docker/linux_tester/Dockerfile index b92bafc56c..8c7f6c6eb4 100644 --- a/dev-tools/docker/linux_tester/Dockerfile +++ b/dev-tools/docker/linux_tester/Dockerfile @@ -10,7 +10,7 @@ # # Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-linux-build:30 +FROM docker.elastic.co/ml-dev/ml-linux-build:32 MAINTAINER David Roberts From 1cb32404c4e18b3f4ee96793667ccb0062b8b902 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Wed, 11 Dec 2024 13:58:33 +0100 Subject: [PATCH 21/38] Update linux.md --- build-setup/linux.md | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/build-setup/linux.md b/build-setup/linux.md index 60b8cd263b..da498ffc0c 100644 --- a/build-setup/linux.md +++ b/build-setup/linux.md @@ -189,16 +189,13 @@ In the resulting `boost_1_86_0` directory, run: This should build the `b2` program, which in turn is used to build Boost. -Edit `boost/unordered/detail/prime_fmod.hpp` and change line 134 from: - +Edit `boost/unordered/detail/prime_fmod.hpp` and change line 37 from ``` - (13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \ + constexpr static std::size_t const sizes[] = {13ul, 29ul, 53ul, 97ul, ``` - to: - ``` - (3ul)(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \ + constexpr static std::size_t const sizes[] = {3ul, 13ul, 29ul, 53ul, 97ul, ``` Finally, run: From ea055c6d55e5552eef3ec48a7a22976d76c0885c Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 13 Dec 2024 09:24:05 +1300 Subject: [PATCH 22/38] [8.18][ML] macOS ARM: Upgrade PyTorch to version 2.5.1 (#2798) (#2802) Update docs and build scripts to refer to PyTorch v2.5.1 Backports #2798 --- 3rd_party/licenses/pytorch-INFO.csv | 2 +- build-setup/macos.md | 6 +++--- dev-tools/download_macos_deps.sh | 2 +- docs/CHANGELOG.asciidoc | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/3rd_party/licenses/pytorch-INFO.csv b/3rd_party/licenses/pytorch-INFO.csv index aab5a87478..6750bd1224 100644 --- a/3rd_party/licenses/pytorch-INFO.csv +++ b/3rd_party/licenses/pytorch-INFO.csv @@ -1,2 +1,2 @@ name,version,revision,url,license,copyright,sourceURL -PyTorch,2.5.0,32f585d9346e316e554c8d9bf7548af9f62141fc,https://pytorch.org,BSD-3-Clause,, +PyTorch,2.5.1,a8d6afb511a69687bbb2b7e88a3cf67917e1697e,https://pytorch.org,BSD-3-Clause,, diff --git a/build-setup/macos.md b/build-setup/macos.md index 4609c073ff..079c9476cc 100644 --- a/build-setup/macos.md +++ b/build-setup/macos.md @@ -127,7 +127,7 @@ Download the graphical installer for Python 3.10.9 from Date: Tue, 14 Jan 2025 14:13:25 +1300 Subject: [PATCH 23/38] [ML] Increase the upper limits for the Boost.JSON SAX parser (#2809) (#2813) The maximum size limits for several features of the Boost.JSON SAX style parser are currently set to arbitrary "largeish" values. This PR increases those upper limits to be as large as possible. Closes #2808 --- docs/CHANGELOG.asciidoc | 6 ++++++ include/core/BoostJsonConstants.h | 9 +++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 7eab0eb4bb..eb104f70d9 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -36,6 +36,12 @@ * Upgrade Boost libraries to version 1.86. (See {ml-pull}2780[#2780], {ml-pull}2779[#2779].) * Drop support for macOS Intel builds. (See {ml-pull}2795[#2795].) +== {es} version 8.16.4 + +=== Bug Fixes + +* Increase the upper limits for the Boost.JSON SAX parser. (See {ml-pull}2809[#2809].) + == {es} version 8.16.0 === Enhancements diff --git a/include/core/BoostJsonConstants.h b/include/core/BoostJsonConstants.h index 3bc75f33d9..059f0aa499 100644 --- a/include/core/BoostJsonConstants.h +++ b/include/core/BoostJsonConstants.h @@ -13,6 +13,7 @@ #define INCLUDED_ml_core_CBoostJsonConstants_h #include +#include namespace ml { namespace core { @@ -21,16 +22,16 @@ namespace boost_json_constants { // Constants that set upper limits for Boost.JSON SAX style parsing // The maximum number of elements allowed in an object -constexpr std::size_t MAX_OBJECT_SIZE = 1'000'000; +constexpr std::size_t MAX_OBJECT_SIZE = std::numeric_limits::max(); // The maximum number of elements allowed in an array -constexpr std::size_t MAX_ARRAY_SIZE = 1'000'000; +constexpr std::size_t MAX_ARRAY_SIZE = std::numeric_limits::max(); // The maximum number of characters allowed in a key -constexpr std::size_t MAX_KEY_SIZE = 1 << 10; +constexpr std::size_t MAX_KEY_SIZE = std::numeric_limits::max(); // The maximum number of characters allowed in a string -constexpr std::size_t MAX_STRING_SIZE = 1 << 30; +constexpr std::size_t MAX_STRING_SIZE = std::numeric_limits::max(); } } } From f79fae8aedd7d2d7a154741152176962fced5fb9 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 30 Jan 2025 12:27:19 +0100 Subject: [PATCH 24/38] [ML] Bump version to 8.19.0 (#2816) Update the Elasticsearch version to 8.19.0 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index 6783aa7ca2..4e5c2aad61 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.daemon=false -elasticsearchVersion=8.18.0 +elasticsearchVersion=8.19.0 artifactName=ml-cpp From 649211d139d781d526e161c78eed69fec693a306 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 7 Mar 2025 09:05:28 +1300 Subject: [PATCH 25/38] [8.19][ML] Correct handling of config updates (#2821) (#2823) * Enable updating of custom rules * Correctly update filters (scoped rules) These changes fix a bug whereby memory corruption was caused when updating filters for an open job. The bug had previously been masked by code that reset the job's analysis config to its original state after every config update - the downside of this was that custom rules could not be updated on the fly, only by stopping and restarting the job. Backports #2821 --- build.gradle | 6 +- docs/CHANGELOG.asciidoc | 6 + include/api/CAnomalyJob.h | 2 + include/api/CAnomalyJobConfig.h | 27 ++- lib/api/CAnomalyJob.cc | 2 - lib/api/CAnomalyJobConfig.cc | 22 --- lib/api/CConfigUpdater.cc | 72 ++++--- lib/api/unittest/CAnomalyJobConfigTest.cc | 45 ----- lib/api/unittest/CAnomalyJobTest.cc | 184 ++++++++++++++++++ lib/api/unittest/CTestAnomalyJob.cc | 22 +++ lib/api/unittest/CTestAnomalyJob.h | 2 + .../testfiles/count_over_ip_config.json | 44 +++++ lib/api/unittest/testfiles/eventConfig.json | 4 + lib/api/unittest/testfiles/filterConfig.json | 11 ++ 14 files changed, 333 insertions(+), 116 deletions(-) create mode 100644 lib/api/unittest/testfiles/count_over_ip_config.json create mode 100644 lib/api/unittest/testfiles/eventConfig.json create mode 100644 lib/api/unittest/testfiles/filterConfig.json diff --git a/build.gradle b/build.gradle index f9ae0d56c8..fea0a7da31 100644 --- a/build.gradle +++ b/build.gradle @@ -435,7 +435,7 @@ task buildDependencyReport(type: Exec) { // This gives us the flexibility to build in different // ways and still use the same upload code. task upload(type: UploadS3Task) { - bucket 'prelert-artifacts' + bucket='prelert-artifacts' // Only upload the platform-specific artifacts in this task def zipFileDir = fileTree("${buildDir}/distributions").matching { include "*-aarch64.zip", "*-x86_64.zip" @@ -447,7 +447,7 @@ task upload(type: UploadS3Task) { } task uploadAll(type: UploadS3Task) { - bucket 'prelert-artifacts' + bucket='prelert-artifacts' // Upload ALL artifacts (including the dependency report) in this task def fileDir = fileTree("${buildDir}/distributions").matching { include "ml-cpp-${project.version}*.zip", "dependencies-${version}.csv" @@ -462,7 +462,7 @@ task uberUpload(type: UploadS3Task, dependsOn: [buildUberZipFromDownloads, buildDependenciesZipFromDownloads, buildNoDependenciesZipFromDownloads, buildDependencyReport]) { - bucket 'prelert-artifacts' + bucket='prelert-artifacts' upload buildUberZipFromDownloads.outputs.files.singleFile, "maven/${artifactGroupPath}/${artifactName}/${project.version}/${buildUberZipFromDownloads.outputs.files.singleFile.name}" upload buildDependenciesZipFromDownloads.outputs.files.singleFile, "maven/${artifactGroupPath}/${artifactName}/${project.version}/${buildDependenciesZipFromDownloads.outputs.files.singleFile.name}" upload buildNoDependenciesZipFromDownloads.outputs.files.singleFile, "maven/${artifactGroupPath}/${artifactName}/${project.version}/${buildNoDependenciesZipFromDownloads.outputs.files.singleFile.name}" diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index eb104f70d9..ef228b7418 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -36,6 +36,12 @@ * Upgrade Boost libraries to version 1.86. (See {ml-pull}2780[#2780], {ml-pull}2779[#2779].) * Drop support for macOS Intel builds. (See {ml-pull}2795[#2795].) +== {es} version 8.16.6 + +=== Bug Fixes + +* Correct handling of config updates. (See {ml-pull}2821[#2821].) + == {es} version 8.16.4 === Bug Fixes diff --git a/include/api/CAnomalyJob.h b/include/api/CAnomalyJob.h index e4f1f452cb..fda2d59972 100644 --- a/include/api/CAnomalyJob.h +++ b/include/api/CAnomalyJob.h @@ -37,6 +37,7 @@ namespace CAnomalyJobTest { struct testParsePersistControlMessageArgs; +struct testConfigUpdate; struct testOutputBucketResultsUntilGivenIncompleteInitialBucket; } @@ -521,6 +522,7 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { core_t::TTime m_InitialLastFinalisedBucketEndTime{0}; // Test case access + friend struct CAnomalyJobTest::testConfigUpdate; friend struct CAnomalyJobTest::testParsePersistControlMessageArgs; friend struct CAnomalyJobTest::testOutputBucketResultsUntilGivenIncompleteInitialBucket; diff --git a/include/api/CAnomalyJobConfig.h b/include/api/CAnomalyJobConfig.h index 3fe56a5402..7f9cae0edd 100644 --- a/include/api/CAnomalyJobConfig.h +++ b/include/api/CAnomalyJobConfig.h @@ -239,7 +239,19 @@ class API_EXPORT CAnomalyJobConfig { } void initRuleFilters(const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters) { - m_RuleFilters = ruleFilters; + // Update or insert values that are in the new map - we never delete filters at this level. + // Note that we can't simply assign "m_RuleFilters = ruleFilters", as that would result in + // the pattern set objects being destroyed and, as they are referenced by the anomaly detector models, + // this is a bad thing. + for (const auto& kv : ruleFilters) { + CDetectionRulesJsonParser::TStrPatternSetUMap::iterator itr = + m_RuleFilters.find(kv.first); + if (itr != m_RuleFilters.end()) { + itr->second = kv.second; + } else { + m_RuleFilters.insert(kv); + } + } } void initScheduledEvents(const TStrDetectionRulePrVec& scheduledEvents) { @@ -249,19 +261,6 @@ class API_EXPORT CAnomalyJobConfig { //! Parse a JSON value representing an entire analysis config object. void parse(const json::value& json); - //! Return a JSON string representing the analysis config - const std::string& getAnalysisConfig(); - - //! Reparse the detector configuration object from within a stored - //! string representing the analysis config object. - //! This is necessary to correctly reinitialise scoped rule objects - //! folowing an update of the fiter rules configuration. - bool reparseDetectorsFromStoredConfig(const std::string& analysisConfig); - - void setConfig(const std::string& analysisConfigString) { - m_AnalysisConfigString = analysisConfigString; - } - core_t::TTime bucketSpan() const { return m_BucketSpan; } //! Return the size of the model prune window expressed as a whole number of seconds. diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 82757df65e..ad8242867a 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -474,8 +474,6 @@ void CAnomalyJob::updateConfig(const std::string& config) { if (configUpdater.update(config) == false) { LOG_ERROR(<< "Failed to update configuration"); } - const std::string& analysisConfig = m_JobConfig.analysisConfig().getAnalysisConfig(); - m_JobConfig.analysisConfig().reparseDetectorsFromStoredConfig(analysisConfig); } void CAnomalyJob::advanceTime(const std::string& time_) { diff --git a/lib/api/CAnomalyJobConfig.cc b/lib/api/CAnomalyJobConfig.cc index 9d05bc53c6..41cd8791d5 100644 --- a/lib/api/CAnomalyJobConfig.cc +++ b/lib/api/CAnomalyJobConfig.cc @@ -589,7 +589,6 @@ bool CAnomalyJobConfig::parse(const std::string& jsonStr) { auto analysisConfig = parameters[ANALYSIS_CONFIG].jsonObject(); if (analysisConfig != nullptr) { - m_AnalysisConfig.setConfig(toString(*analysisConfig)); m_AnalysisConfig.parse(*analysisConfig); } @@ -724,27 +723,6 @@ void CAnomalyJobConfig::CAnalysisConfig::parseDetectorsConfig(const json::value& } } -const std::string& CAnomalyJobConfig::CAnalysisConfig::getAnalysisConfig() { - return m_AnalysisConfigString; -} - -bool CAnomalyJobConfig::CAnalysisConfig::reparseDetectorsFromStoredConfig(const std::string& analysisConfig) { - json::value doc; - bool ok = core::CBoostJsonParser::parse(analysisConfig, doc); - if (ok == false) { - LOG_ERROR(<< "An error occurred while parsing anomaly job config from JSON: \"" - << analysisConfig << "\""); - return false; - } - - auto parameters = ANALYSIS_CONFIG_READER.read(doc); - auto detectorsConfig = parameters[DETECTORS].jsonObject(); - if (detectorsConfig != nullptr) { - this->parseDetectorsConfig(*detectorsConfig); - } - return true; -} - void CAnomalyJobConfig::CAnalysisConfig::parse(const json::value& analysisConfig) { auto parameters = ANALYSIS_CONFIG_READER.read(analysisConfig); // We choose to ignore any errors here parsing the time duration string as diff --git a/lib/api/CConfigUpdater.cc b/lib/api/CConfigUpdater.cc index 17cb847572..dc3ae7e288 100644 --- a/lib/api/CConfigUpdater.cc +++ b/lib/api/CConfigUpdater.cc @@ -41,41 +41,53 @@ bool CConfigUpdater::update(const std::string& json) { } json::object obj = doc.as_object(); + for (const auto& kv : obj) { + if (kv.key() == CAnomalyJobConfig::MODEL_PLOT_CONFIG) { + LOG_TRACE(<< "Updating model plot config"); - if (obj.contains(CAnomalyJobConfig::MODEL_PLOT_CONFIG)) { - if (obj[CAnomalyJobConfig::MODEL_PLOT_CONFIG].is_object() == false) { - LOG_ERROR(<< "Input error: expected " << CAnomalyJobConfig::MODEL_PLOT_CONFIG - << " to be JSON object but input was '" << json - << "'. Please report this problem."); - return false; - } - const json::value& value = obj[CAnomalyJobConfig::MODEL_PLOT_CONFIG]; + if (kv.value().is_object() == false) { + LOG_ERROR(<< "Input error: expected " << CAnomalyJobConfig::MODEL_PLOT_CONFIG + << " to be JSON object but input was '" << json + << "'. Please report this problem."); + return false; + } - m_JobConfig.modelPlotConfig().parse(value); - const ml::api::CAnomalyJobConfig::CModelPlotConfig& modelPlotConfig = - m_JobConfig.modelPlotConfig(); - m_ModelConfig.configureModelPlot(modelPlotConfig.enabled(), - modelPlotConfig.annotationsEnabled(), - modelPlotConfig.terms()); - } else if (obj.contains(CAnomalyJobConfig::FILTERS)) { - if (m_JobConfig.parseFilterConfig(json) == false) { - LOG_ERROR(<< "Failed to parse filter config update: " << json); - return false; - } - m_JobConfig.initRuleFilters(); - } else if (obj.contains(CAnomalyJobConfig::EVENTS)) { - if (m_JobConfig.parseEventConfig(json) == false) { - LOG_ERROR(<< "Failed to parse events config update: " << json); + m_JobConfig.modelPlotConfig().parse(kv.value()); + const ml::api::CAnomalyJobConfig::CModelPlotConfig& modelPlotConfig = + m_JobConfig.modelPlotConfig(); + m_ModelConfig.configureModelPlot(modelPlotConfig.enabled(), + modelPlotConfig.annotationsEnabled(), + modelPlotConfig.terms()); + } else if (kv.key() == CAnomalyJobConfig::FILTERS) { + LOG_TRACE(<< "Updating filters config"); + + if (m_JobConfig.parseFilterConfig(json) == false) { + LOG_ERROR(<< "Failed to parse filter config update: " << json); + return false; + } + LOG_TRACE(<< "Calling m_JobConfig.initRuleFilters"); + + m_JobConfig.initRuleFilters(); + + LOG_TRACE(<< "Done calling m_JobConfig.initRuleFilters"); + + } else if (kv.key() == CAnomalyJobConfig::EVENTS) { + LOG_TRACE(<< "Updating events config"); + + if (m_JobConfig.parseEventConfig(json) == false) { + LOG_ERROR(<< "Failed to parse events config update: " << json); + return false; + } + m_JobConfig.initScheduledEvents(); + } else if (kv.key() == CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_RULES) { + LOG_TRACE(<< "Updating detector rules config"); + return m_JobConfig.analysisConfig().parseRulesUpdate(kv.value()); + } else { + LOG_ERROR(<< "Unexpected JSON update message: " << json); return false; } - m_JobConfig.initScheduledEvents(); - } else if (obj.contains(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_RULES)) { - return m_JobConfig.analysisConfig().parseRulesUpdate( - obj[CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_RULES]); - } else { - LOG_ERROR(<< "Unexpected JSON update message: " << json); - return false; } + return true; } } diff --git a/lib/api/unittest/CAnomalyJobConfigTest.cc b/lib/api/unittest/CAnomalyJobConfigTest.cc index 3699baa6f4..78b8fdcb07 100644 --- a/lib/api/unittest/CAnomalyJobConfigTest.cc +++ b/lib/api/unittest/CAnomalyJobConfigTest.cc @@ -57,51 +57,6 @@ BOOST_AUTO_TEST_CASE(testIntervalStagger) { BOOST_REQUIRE_EQUAL(job3Config.intervalStagger(), job1Config.intervalStagger()); } -BOOST_AUTO_TEST_CASE(testReparseDetectorsFromStoredConfig) { - const std::string validAnomalyJobConfigWithCustomRuleFilter{ - "{\"job_id\":\"mean_bytes_by_clientip\",\"job_type\":\"anomaly_detector\",\"job_version\":\"8.0.0\",\"create_time\":1604671135245,\"description\":\"mean bytes by clientip\"," - "\"analysis_config\":{\"bucket_span\":\"3h\",\"detectors\":[{\"detector_description\":\"mean(bytes) by clientip\",\"function\":\"mean\",\"field_name\":\"bytes\",\"by_field_name\":\"clientip\"," - "\"custom_rules\":[{\"actions\":[\"skip_result\"],\"scope\":{\"clientip\":{\"filter_id\":\"safe_ips\",\"filter_type\":\"include\"}},\"conditions\":[{\"applies_to\":\"actual\",\"operator\":\"lt\",\"value\":10.0}]}]," - "\"detector_index\":0}],\"influencers\":[\"clientip\"]},\"analysis_limits\":{\"model_memory_limit\":\"42mb\",\"categorization_examples_limit\":4}," - "\"data_description\":{\"time_field\":\"timestamp\",\"time_format\":\"epoch_ms\"},\"model_plot_config\":{\"enabled\":false,\"annotations_enabled\":false}," - "\"model_snapshot_retention_days\":10,\"daily_model_snapshot_retention_after_days\":1,\"results_index_name\":\"shared\",\"allow_lazy_open\":false}"}; - - // Expect parsing to succeed if the filter referenced by the custom rule can be found in the filter map. - const std::string filterConfigJson{"{\"filters\":[{\"filter_id\":\"safe_ips\",\"items\":[]}]}"}; - ml::api::CAnomalyJobConfig jobConfig; - BOOST_TEST_REQUIRE(jobConfig.parseFilterConfig(filterConfigJson)); - - const std::string validScheduledEventsConfigJson{"{\"events\":[" - "]}"}; - - BOOST_TEST_REQUIRE(jobConfig.parseEventConfig(validScheduledEventsConfigJson)); - - jobConfig.analysisConfig().init(jobConfig.ruleFilters(), jobConfig.scheduledEvents()); - - BOOST_REQUIRE_MESSAGE(jobConfig.parse(validAnomalyJobConfigWithCustomRuleFilter), - "Cannot parse JSON job config!"); - BOOST_TEST_REQUIRE(jobConfig.isInitialized()); - - // Expect parsing to fail if the analysis config JSON string is invalid - const std::string inValidAnalysisConfigString{"{\"bucket_span\":\"1h\""}; - BOOST_TEST_REQUIRE(!jobConfig.analysisConfig().reparseDetectorsFromStoredConfig( - inValidAnalysisConfigString)); - - // Expect parsing to fail if the filter referenced by the custom rule cannot be found - const std::string validAnalysisConfigStringWithUnknownFilter{ - "{\"bucket_span\":\"1h\",\"detectors\":[{\"detector_description\":\"count over ip\",\"function\":\"count\",\"over_field_name\":\"ip\",\"custom_rules\":[{\"actions\":[\"skip_result\"],\"scope\":{\"ip\":{\"filter_id\":\"unknown_filter\",\"filter_type\":\"include\"}}}],\"detector_index\":0}],\"influencers\":[],\"model_prune_window\":\"30d\"}"}; - BOOST_REQUIRE_EXCEPTION( - jobConfig.analysisConfig().reparseDetectorsFromStoredConfig(validAnalysisConfigStringWithUnknownFilter), - ml::api::CAnomalyJobConfigReader::CParseError, - [](ml::api::CAnomalyJobConfigReader::CParseError const&) { return true; }); - - // Expect parsing to succeed if the filter referenced by the custom rule is registered. - const std::string validAnalysisConfigString{ - "{\"bucket_span\":\"1h\",\"detectors\":[{\"detector_description\":\"count over ip\",\"function\":\"count\",\"over_field_name\":\"ip\",\"custom_rules\":[{\"actions\":[\"skip_result\"],\"scope\":{\"ip\":{\"filter_id\":\"safe_ips\",\"filter_type\":\"include\"}}}],\"detector_index\":0}],\"influencers\":[],\"model_prune_window\":\"30d\"}"}; - BOOST_TEST_REQUIRE(jobConfig.analysisConfig().reparseDetectorsFromStoredConfig( - validAnalysisConfigString)); -} - BOOST_AUTO_TEST_CASE(testParse) { using TAnalysisConfig = ml::api::CAnomalyJobConfig::CAnalysisConfig; diff --git a/lib/api/unittest/CAnomalyJobTest.cc b/lib/api/unittest/CAnomalyJobTest.cc index 02d5925483..0de490e0e5 100644 --- a/lib/api/unittest/CAnomalyJobTest.cc +++ b/lib/api/unittest/CAnomalyJobTest.cc @@ -36,6 +36,7 @@ #include #include #include +#include #include BOOST_TEST_DONT_PRINT_LOG_VALUE(json::array::const_iterator) @@ -186,6 +187,9 @@ bool findLine(const std::string& regex, const ml::core::CRegex::TStrVec& lines) } const ml::core_t::TTime BUCKET_SIZE(3600); + +using TStrStrPr = std::pair; +using TStrStrPrVec = std::vector; } using namespace ml; @@ -851,6 +855,186 @@ BOOST_AUTO_TEST_CASE(testRestoreFailsWithEmptyStream) { BOOST_TEST_REQUIRE(job.restoreState(restoreSearcher, completeToTime) == false); } +BOOST_AUTO_TEST_CASE(testConfigUpdate) { + // This, in part, is essentially replicating the DetectionRulesIT/testScope Java REST test. + // It proves useful to have the test here too, as it provides an entrypoint for investigating + // any issues related to filters, especially when updating them when already referenced by anomaly detector models. + // We simply want to see the job run to completion. + ml::api::CAnomalyJobConfig jobConfig; + BOOST_REQUIRE_EQUAL(true, jobConfig.initFromFiles("testfiles/count_over_ip_config.json", + "testfiles/filterConfig.json", + "testfiles/eventConfig.json")); + + const ml::api::CAnomalyJobConfig::CAnalysisConfig& analysisConfig = + jobConfig.analysisConfig(); + + model::CLimits limits; + + model::CAnomalyDetectorModelConfig modelConfig = analysisConfig.makeModelConfig(); + std::stringstream outputStrm; + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); + + CTestAnomalyJob job("job", limits, jobConfig, modelConfig, wrappedOutputStream); + + auto generateRandomAlpha = [](int strLen) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, 25); + + std::string str; + for (int i = 0; i < strLen; ++i) { + str += char('a' + dis(gen)); + } + return str; + }; + + long timestamp = 1509062400000L; + TStrStrPrVec data; + + for (int bucket = 0; bucket < 20; bucket++) { + for (int i = 0; i < 5; i++) { + data.emplace_back(core::CStringUtils::typeToString(timestamp), + generateRandomAlpha(10)); + } + timestamp += 3600 * 1000; + } + + // Now send anomalous counts for our filtered IPs plus 333.333.333.333 + auto namedIps = std::vector{"111.111.111.111", "222.222.222.222", "333.333.333.333"}; + for (int i = 0; i < 10; i++) { + for (auto& ip : namedIps) { + data.emplace_back(core::CStringUtils::typeToString(timestamp), ip); + } + } + + for (int bucket = 0; bucket < 3; bucket++) { + for (int i = 0; i < 5; i++) { + data.emplace_back(core::CStringUtils::typeToString(timestamp), + generateRandomAlpha(10)); + } + timestamp += 3600 * 1000; + } + + CTestAnomalyJob::TStrStrUMap dataRows; + + for (const auto & [ time, ip ] : data) { + dataRows["time"] = time; + dataRows["ip"] = ip; + BOOST_TEST_REQUIRE(job.handleRecord(dataRows)); + } + + BOOST_REQUIRE_EQUAL(145, job.numRecordsHandled()); + + const std::string& detectorConfig1{R"( + { + "filters":[{"filter_id":"safe_ips", "items":["111.111.111.111","222.222.222.222"]}], + "events":[{"description":"event_1", "rules":[{"actions":["skip_result","skip_model_update"],"conditions":[{"applies_to":"time","operator":"gte","value": 1.0},{"applies_to":"time","operator":"lt","value": 2.0}]}]}], + "model_plot_config":{"enabled":true,"annotations_enabled":false}, + "detector_rules":{"detector_index":0,"custom_rules":[{"actions":["skip_result"],"conditions":[{"applies_to":"actual","operator":"gte","value":15.0},{"applies_to":"actual","operator":"lte","value":30.0}]}]} + } + )"}; + + job.updateConfig(detectorConfig1); + + BOOST_REQUIRE_EQUAL(1, jobConfig.analysisConfig().detectionRules().size()); + auto itr = jobConfig.analysisConfig().detectionRules().find(0); + BOOST_REQUIRE_EQUAL(1, itr->second.size()); + std::string rule{itr->second[0].print()}; + BOOST_REQUIRE_EQUAL( + std::string("SKIP_RESULT IF ACTUAL >= 15.000000 AND ACTUAL <= 30.000000"), rule); + + api::CAnomalyJobConfig::CModelPlotConfig& modelPlotConfig = jobConfig.modelPlotConfig(); + BOOST_REQUIRE_EQUAL(false, modelPlotConfig.annotationsEnabled()); + BOOST_REQUIRE_EQUAL(true, modelPlotConfig.enabled()); + + auto events = jobConfig.analysisConfig().scheduledEvents(); + BOOST_REQUIRE_EQUAL(1, events.size()); + BOOST_REQUIRE_EQUAL(std::string("event_1"), events[0].first); + BOOST_REQUIRE_EQUAL(std::string("SKIP_RESULT AND SKIP_MODEL_UPDATE IF TIME >= 1.000000 AND TIME < 2.000000"), + events[0].second.print()); + + auto ruleFilters = jobConfig.ruleFilters(); + BOOST_REQUIRE_EQUAL(1, ruleFilters.size()); + + BOOST_REQUIRE_EQUAL(true, ruleFilters["safe_ips"].contains("111.111.111.111")); + BOOST_REQUIRE_EQUAL(true, ruleFilters["safe_ips"].contains("222.222.222.222")); + BOOST_REQUIRE_EQUAL(false, ruleFilters["safe_ips"].contains("333.333.333.333")); + + const std::string& detectorConfig2{R"( + { + "filters":[{"filter_id":"safe_ips", "items":["333.333.333.333"]}], + "events":[{"description":"event_1", "rules":[{"actions":["skip_result","skip_model_update"],"conditions":[{"applies_to":"time","operator":"gte","value": 2.0},{"applies_to":"time","operator":"lt","value": 3.0}]}]}], + "model_plot_config":{"enabled":false,"annotations_enabled":true}, + "detector_rules":{"detector_index":0,"custom_rules":[{"actions":["skip_result"],"conditions":[{"applies_to":"typical","operator":"gte","value":10.0},{"applies_to":"typical","operator":"lte","value":50.0}]}]} + })"}; + + job.updateConfig(detectorConfig2); + + data.clear(); + // Send another anomalous bucket + for (int i = 0; i < 10; i++) { + for (auto& ip : namedIps) { + data.emplace_back(core::CStringUtils::typeToString(timestamp), ip); + } + } + + // Some more normal buckets + for (int bucket = 0; bucket < 3; bucket++) { + for (int i = 0; i < 5; i++) { + data.emplace_back(core::CStringUtils::typeToString(timestamp), + generateRandomAlpha(10)); + } + timestamp += 3600 * 1000; + } + + dataRows.clear(); + for (const auto & [ time, ip ] : data) { + dataRows["time"] = time; + dataRows["ip"] = ip; + BOOST_TEST_REQUIRE(job.handleRecord(dataRows)); + } + + BOOST_REQUIRE_EQUAL(190, job.numRecordsHandled()); + + BOOST_REQUIRE_EQUAL(1, jobConfig.analysisConfig().detectionRules().size()); + itr = jobConfig.analysisConfig().detectionRules().find(0); + BOOST_REQUIRE_EQUAL(1, itr->second.size()); + rule = itr->second[0].print(); + BOOST_REQUIRE_EQUAL( + std::string("SKIP_RESULT IF TYPICAL >= 10.000000 AND TYPICAL <= 50.000000"), rule); + + modelPlotConfig = jobConfig.modelPlotConfig(); + BOOST_REQUIRE_EQUAL(true, modelPlotConfig.annotationsEnabled()); + BOOST_REQUIRE_EQUAL(false, modelPlotConfig.enabled()); + + events = jobConfig.analysisConfig().scheduledEvents(); + BOOST_REQUIRE_EQUAL(1, events.size()); + BOOST_REQUIRE_EQUAL(std::string("event_1"), events[0].first); + BOOST_REQUIRE_EQUAL(std::string("SKIP_RESULT AND SKIP_MODEL_UPDATE IF TIME >= 2.000000 AND TIME < 3.000000"), + events[0].second.print()); + + ruleFilters = jobConfig.ruleFilters(); + BOOST_REQUIRE_EQUAL(1, ruleFilters.size()); + + BOOST_REQUIRE_EQUAL(false, ruleFilters["safe_ips"].contains("111.111.111.111")); + BOOST_REQUIRE_EQUAL(false, ruleFilters["safe_ips"].contains("222.222.222.222")); + BOOST_REQUIRE_EQUAL(true, ruleFilters["safe_ips"].contains("333.333.333.333")); + + job.finalise(); + wrappedOutputStream.syncFlush(); + + std::string output = outputStrm.str(); + LOG_TRACE(<< "Output has yielded: " << output); + + // check that the quantile state has actually been persisted + core::CRegex regex; + regex.init("\n"); + core::CRegex::TStrVec lines; + regex.split(output, lines); + BOOST_REQUIRE_EQUAL( + true, findLine("\"quantiles\":{\"job_id\":\"job\",\"quantile_state\".*", lines)); +} + BOOST_AUTO_TEST_CASE(testParsePersistControlMessageArgs) { { const ml::core_t::TTime expectedSnapshotTimestamp{1283524206}; diff --git a/lib/api/unittest/CTestAnomalyJob.cc b/lib/api/unittest/CTestAnomalyJob.cc index 89e139fc3e..5a3f678932 100644 --- a/lib/api/unittest/CTestAnomalyJob.cc +++ b/lib/api/unittest/CTestAnomalyJob.cc @@ -50,3 +50,25 @@ CTestAnomalyJob::makeSimpleJobConfig(const std::string& functionName, influencers, summaryCountFieldName); return jobConfig; } + +ml::api::CAnomalyJobConfig CTestAnomalyJob::makeJobConfig(const std::string& detectorsConfig) { + json::parser p; + boost::system::error_code ec; + p.write_some(detectorsConfig, ec); + if (ec) { + LOG_ERROR(<< "An error occurred while parsing JSON: " << ec.message()); + return {}; + } + json::value doc = p.release(); + if (doc.is_object() == false) { + LOG_ERROR(<< "Input error: expected JSON object but input was '" + << detectorsConfig << "'. Please report this problem."); + return {}; + } + + json::object obj = doc.as_object(); + + ml::api::CAnomalyJobConfig jobConfig; + jobConfig.analysisConfig().parseDetectorsConfig(obj); + return jobConfig; +} \ No newline at end of file diff --git a/lib/api/unittest/CTestAnomalyJob.h b/lib/api/unittest/CTestAnomalyJob.h index c794d7e17c..01187dfcdf 100644 --- a/lib/api/unittest/CTestAnomalyJob.h +++ b/lib/api/unittest/CTestAnomalyJob.h @@ -55,6 +55,8 @@ class CTestAnomalyJob : public ml::api::CAnomalyJob { const std::string& partitionFieldName, const TStrVec& influencers = {}, const std::string& summaryCountFieldName = ""); + + static ml::api::CAnomalyJobConfig makeJobConfig(const std::string& detectorsConfig); }; #endif // INCLUDED_CTestAnomalyJob_h diff --git a/lib/api/unittest/testfiles/count_over_ip_config.json b/lib/api/unittest/testfiles/count_over_ip_config.json new file mode 100644 index 0000000000..170afb1925 --- /dev/null +++ b/lib/api/unittest/testfiles/count_over_ip_config.json @@ -0,0 +1,44 @@ +{ + "job_id": "detection-rules-it-test-scope", + "job_type": "anomaly_detector", + "job_version": "12.0.0", + "create_time": 1739482196563, + "analysis_config": { + "bucket_span": "1h", + "detectors": [ + { + "detector_description": "count over ip", + "function": "count", + "over_field_name": "ip", + "custom_rules": [ + { + "actions": [ + "skip_result" + ], + "scope": { + "ip": { + "filter_id": "safe_ips", + "filter_type": "include" + } + } + } + ], + "detector_index": 0 + } + ], + "influencers": [], + "model_prune_window": "30d" + }, + "analysis_limits": { + "model_memory_limit": "1024mb", + "categorization_examples_limit": 4 + }, + "data_description": { + "time_field": "time", + "time_format": "epoch_ms" + }, + "model_snapshot_retention_days": 10, + "daily_model_snapshot_retention_after_days": 1, + "results_index_name": "shared", + "allow_lazy_open": false +} diff --git a/lib/api/unittest/testfiles/eventConfig.json b/lib/api/unittest/testfiles/eventConfig.json new file mode 100644 index 0000000000..c14d27bb64 --- /dev/null +++ b/lib/api/unittest/testfiles/eventConfig.json @@ -0,0 +1,4 @@ +{ + "events": [ + ] +} diff --git a/lib/api/unittest/testfiles/filterConfig.json b/lib/api/unittest/testfiles/filterConfig.json new file mode 100644 index 0000000000..682f3d451d --- /dev/null +++ b/lib/api/unittest/testfiles/filterConfig.json @@ -0,0 +1,11 @@ +{ + "filters": [ + { + "filter_id": "safe_ips", + "items": [ + "111.111.111.111", + "222.222.222.222" + ] + } + ] +} From a5050a970c27450ba91a5ca56863ad6ee5bfb39a Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 19 Mar 2025 16:13:27 +1300 Subject: [PATCH 26/38] [8.19][ML] Update location of boost libraries repo (#2829) (#2836) The Boost repository is no longer hosted by jfrog. Update scripts, docs, Dockerfiles etc to reference the new location. Backports #2829 --- build-setup/linux.md | 2 +- build-setup/macos.md | 2 +- build-setup/windows.md | 2 +- dev-tools/docker/linux_aarch64_native_image/Dockerfile | 2 +- dev-tools/docker/linux_image/Dockerfile | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/build-setup/linux.md b/build-setup/linux.md index da498ffc0c..adcc5d1a0f 100644 --- a/build-setup/linux.md +++ b/build-setup/linux.md @@ -173,7 +173,7 @@ to install. ### Boost 1.86.0 -Download version 1.86.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. +Download version 1.86.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. Assuming you chose the `.bz2` version, extract it to a temporary directory: diff --git a/build-setup/macos.md b/build-setup/macos.md index 079c9476cc..6f6b2afaeb 100644 --- a/build-setup/macos.md +++ b/build-setup/macos.md @@ -68,7 +68,7 @@ at the command prompt. ### Boost 1.86.0 -Download version 1.86.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. +Download version 1.86.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. Assuming you chose the `.bz2` version, extract it to a temporary directory: diff --git a/build-setup/windows.md b/build-setup/windows.md index 461d009e8f..f08820e9e6 100644 --- a/build-setup/windows.md +++ b/build-setup/windows.md @@ -121,7 +121,7 @@ nmake install ### Boost 1.86.0 -Download version 1.86.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. +Download version 1.86.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. Assuming you chose the `.bz2` version, extract it in a Git bash shell using the GNU tar that comes with Git for Windows, e.g.: diff --git a/dev-tools/docker/linux_aarch64_native_image/Dockerfile b/dev-tools/docker/linux_aarch64_native_image/Dockerfile index e00a5f9b6e..7f533f2249 100644 --- a/dev-tools/docker/linux_aarch64_native_image/Dockerfile +++ b/dev-tools/docker/linux_aarch64_native_image/Dockerfile @@ -78,7 +78,7 @@ RUN \ # Build Boost RUN \ cd ${build_dir} && \ - wget --quiet -O - https://boostorg.jfrog.io/artifactory/main/release/1.86.0/source/boost_1_86_0.tar.bz2 | tar jxf - && \ + wget --quiet -O - https://archives.boost.io/release/1.86.0/source/boost_1_86_0.tar.bz2 | tar jxf - && \ cd boost_1_86_0 && \ ./bootstrap.sh --without-libraries=context --without-libraries=coroutine --without-libraries=graph_parallel --without-libraries=mpi --without-libraries=python --without-icu && \ sed -i -e 's/{13ul/{3ul, 13ul/' boost/unordered/detail/prime_fmod.hpp&& \ diff --git a/dev-tools/docker/linux_image/Dockerfile b/dev-tools/docker/linux_image/Dockerfile index be6a58ca31..65ba958779 100644 --- a/dev-tools/docker/linux_image/Dockerfile +++ b/dev-tools/docker/linux_image/Dockerfile @@ -78,7 +78,7 @@ RUN \ # Build Boost RUN \ cd ${build_dir} && \ - wget --quiet -O - https://boostorg.jfrog.io/artifactory/main/release/1.86.0/source/boost_1_86_0.tar.bz2 | tar jxf - && \ + wget --quiet -O - https://archives.boost.io/release/1.86.0/source/boost_1_86_0.tar.bz2 | tar jxf - && \ cd boost_1_86_0 && \ ./bootstrap.sh --without-libraries=context --without-libraries=coroutine --without-libraries=graph_parallel --without-libraries=mpi --without-libraries=python --without-icu && \ sed -i -e 's/{13ul/{3ul, 13ul/' boost/unordered/detail/prime_fmod.hpp && \ From 36348a428d1631fe20e84a70e338f6933ac55e87 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Tue, 25 Mar 2025 09:08:57 +0100 Subject: [PATCH 27/38] [ML] Track memory usage in CHierarchicalResultsNormalizer (#2831) (#2839) Backport of #2831 --- bin/normalize/Main.cc | 7 +++- docs/CHANGELOG.asciidoc | 7 ++++ include/api/CAnomalyJob.h | 2 + include/api/CResultNormalizer.h | 3 +- .../model/CHierarchicalResultsAggregator.h | 2 + include/model/CHierarchicalResultsLevelSet.h | 27 ++++++++++++ .../model/CHierarchicalResultsNormalizer.h | 35 +++++++++++++--- include/model/CResourceMonitor.h | 6 +-- lib/api/CAnomalyJob.cc | 5 ++- lib/api/CResultNormalizer.cc | 5 ++- lib/api/unittest/CAnomalyJobTest.cc | 22 ++++++++++ lib/api/unittest/CJsonOutputWriterTest.cc | 4 +- lib/api/unittest/CRestorePreviousStateTest.cc | 3 +- lib/api/unittest/CResultNormalizerTest.cc | 7 +++- lib/api/unittest/CTestAnomalyJob.h | 4 ++ lib/model/CHierarchicalResultsAggregator.cc | 1 + lib/model/CHierarchicalResultsNormalizer.cc | 41 +++++++++++++++++-- .../CHierarchicalResultsLevelSetTest.cc | 28 +++++++++++++ .../unittest/CHierarchicalResultsTest.cc | 9 ++-- lib/model/unittest/CMakeLists.txt | 2 +- 20 files changed, 194 insertions(+), 26 deletions(-) diff --git a/bin/normalize/Main.cc b/bin/normalize/Main.cc index f8cbe36501..9aa8e94895 100644 --- a/bin/normalize/Main.cc +++ b/bin/normalize/Main.cc @@ -27,6 +27,7 @@ #include +#include #include #include #include @@ -144,8 +145,12 @@ int main(int argc, char** argv) { ioMgr.outputStream()); }()}; + // Initialize memory limits with default values. + // This is fine as the normalizer doesn't use the memory limit. + ml::model::CLimits limits{false}; + // This object will do the work - ml::api::CResultNormalizer normalizer{modelConfig, *outputWriter}; + ml::api::CResultNormalizer normalizer{modelConfig, *outputWriter, limits}; // Restore state if (!quantilesStateFile.empty()) { diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index ef228b7418..3de30494a0 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -28,6 +28,13 @@ //=== Regressions +== {es} version 8.19.0 + +=== Enhancements + +* Track memory used in the hierarchical results normalizer. (See {ml-pull}2831[#2831].) + + == {es} version 8.18.0 === Enhancements diff --git a/include/api/CAnomalyJob.h b/include/api/CAnomalyJob.h index fda2d59972..424b065339 100644 --- a/include/api/CAnomalyJob.h +++ b/include/api/CAnomalyJob.h @@ -428,6 +428,8 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! be pruned, i.e. those which are so old as to be effectively dead. void pruneAllModels(std::size_t buckets = 0); + const model::CHierarchicalResultsNormalizer& normalizer() const; + private: //! The job ID std::string m_JobId; diff --git a/include/api/CResultNormalizer.h b/include/api/CResultNormalizer.h index b4fbf6ddac..276b8196e3 100644 --- a/include/api/CResultNormalizer.h +++ b/include/api/CResultNormalizer.h @@ -81,7 +81,8 @@ class API_EXPORT CResultNormalizer { public: CResultNormalizer(const model::CAnomalyDetectorModelConfig& modelConfig, - CSimpleOutputWriter& outputWriter); + CSimpleOutputWriter& outputWriter, + model::CLimits& limits); //! Initialise the system change normalizer bool initNormalizer(const std::string& stateFileName); diff --git a/include/model/CHierarchicalResultsAggregator.h b/include/model/CHierarchicalResultsAggregator.h index a83d01ae61..698fddd729 100644 --- a/include/model/CHierarchicalResultsAggregator.h +++ b/include/model/CHierarchicalResultsAggregator.h @@ -12,6 +12,8 @@ #ifndef INCLUDED_ml_model_CHierarchicalResultsAggregator_h #define INCLUDED_ml_model_CHierarchicalResultsAggregator_h +#include + #include #include #include diff --git a/include/model/CHierarchicalResultsLevelSet.h b/include/model/CHierarchicalResultsLevelSet.h index 3083ca19d3..48da35f406 100644 --- a/include/model/CHierarchicalResultsLevelSet.h +++ b/include/model/CHierarchicalResultsLevelSet.h @@ -12,6 +12,7 @@ #ifndef INCLUDED_ml_model_CHierarchicalResultsLevelSet_h #define INCLUDED_ml_model_CHierarchicalResultsLevelSet_h +#include "model/ImportExport.h" #include #include @@ -21,6 +22,10 @@ #include +namespace CHierarchicalResultsLevelSetTest { +struct testMemoryUsage; +} + namespace ml { namespace model { @@ -240,6 +245,26 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor { return maths::common::CChecksum::calculate(seed, m_LeafSet); } + void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const { + mem->setName("Hierarchical Results Level Set Memory Usage"); + core::memory_debug::dynamicSize("m_BucketElement", m_BucketElement, mem); + core::memory_debug::dynamicSize("m_InfluencerBucketSet", m_InfluencerBucketSet, mem); + core::memory_debug::dynamicSize("m_InfluencerSet", m_InfluencerSet, mem); + core::memory_debug::dynamicSize("m_PartitionSet", m_PartitionSet, mem); + core::memory_debug::dynamicSize("m_PersonSet", m_PersonSet, mem); + core::memory_debug::dynamicSize("m_LeafSet", m_LeafSet, mem); + } + + std::size_t memoryUsage() const { + std::size_t mem = core::memory::dynamicSize(m_BucketElement); + mem += core::memory::dynamicSize(m_InfluencerBucketSet); + mem += core::memory::dynamicSize(m_InfluencerSet); + mem += core::memory::dynamicSize(m_PartitionSet); + mem += core::memory::dynamicSize(m_PersonSet); + mem += core::memory::dynamicSize(m_LeafSet); + return mem; + } + private: //! Get an element of \p set by name. static const T* element(const TWordTypePrVec& set, const std::string& name) { @@ -299,6 +324,8 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor { //! The container for leaves comprising distinct named //! (partition, person) field name pairs. TWordTypePrVec m_LeafSet; + + friend struct CHierarchicalResultsLevelSetTest::testMemoryUsage; }; template diff --git a/include/model/CHierarchicalResultsNormalizer.h b/include/model/CHierarchicalResultsNormalizer.h index b4fc0d1789..4bd249df05 100644 --- a/include/model/CHierarchicalResultsNormalizer.h +++ b/include/model/CHierarchicalResultsNormalizer.h @@ -12,15 +12,16 @@ #ifndef INCLUDED_ml_model_CHierarchicalResultsNormalizer_h #define INCLUDED_ml_model_CHierarchicalResultsNormalizer_h +#include #include #include #include +#include +#include #include -#include #include -#include #include namespace ml { @@ -44,6 +45,10 @@ struct MODEL_EXPORT SNormalizer { //! Compute a checksum for this object. uint64_t checksum() const; + void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const; + + std::size_t memoryUsage() const; + std::string s_Description; TNormalizerPtr s_Normalizer; }; @@ -84,6 +89,7 @@ struct MODEL_EXPORT SNormalizer { //! normalizers is negligible. class MODEL_EXPORT CHierarchicalResultsNormalizer : public CHierarchicalResultsLevelSet, + public CMonitoredResource, private core::CNonCopyable { public: using TBase = CHierarchicalResultsLevelSet; @@ -106,9 +112,10 @@ class MODEL_EXPORT CHierarchicalResultsNormalizer enum ERestoreOutcome { E_Ok = 0, E_Corrupt = 1, E_Incomplete = 2 }; public: - CHierarchicalResultsNormalizer(const CAnomalyDetectorModelConfig& modelConfig); + CHierarchicalResultsNormalizer(CLimits& limits, + const CAnomalyDetectorModelConfig& modelConfig); - ~CHierarchicalResultsNormalizer() override = default; + ~CHierarchicalResultsNormalizer() override; //! Add a job for the subsequent invocations of the normalizer. void setJob(EJob job); @@ -167,6 +174,19 @@ class MODEL_EXPORT CHierarchicalResultsNormalizer const std::string& functionName, const std::string& valueFieldName) const; + //! Get the memory used by this hierarchical results normalizer. + void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const override; + + //! Return the total memory usage. + std::size_t memoryUsage() const override; + + //! Get the static size of this object. + std::size_t staticSize() const override; + + //! Update the overall model size stats with information from the + //! hierarchical results normalizer. + void updateModelSizeStats(CResourceMonitor::SModelSizeStats& modelSizeStats) const override; + private: //! \brief Creates new normalizer instances. class CNormalizerFactory { @@ -210,15 +230,18 @@ class MODEL_EXPORT CHierarchicalResultsNormalizer static std::string leafCue(const TWord& word); private: + //! Configurable limits + CLimits& m_Limits; + //! The jobs that the normalizer will perform when invoked //! can be: update, normalize or update + normalize. - EJob m_Job; + EJob m_Job{E_NoOp}; //! The model configuration file. const CAnomalyDetectorModelConfig& m_ModelConfig; //! Whether the last update of the quantiles has caused a big change. - bool m_HasLastUpdateCausedBigChange; + bool m_HasLastUpdateCausedBigChange{false}; }; } } diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 1c6375691d..5c7583888b 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -177,6 +177,9 @@ class MODEL_EXPORT CResourceMonitor { //! by calling this once per bucket processed until the initially requested memory limit is reached. void decreaseMargin(core_t::TTime elapsedTime); + //! Returns the sum of used memory plus any extra memory + std::size_t totalMemory() const; + private: using TMonitoredResourcePtrSizeUMap = boost::unordered_map; @@ -218,9 +221,6 @@ class MODEL_EXPORT CResourceMonitor { //! Get the low memory limit with margin applied. std::size_t lowLimit() const; - //! Returns the sum of used memory plus any extra memory - std::size_t totalMemory() const; - //! Adjusts the amount of memory reported to take into //! account the current value of the byte limit margin and the effects //! of background persistence. diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index ad8242867a..da08829421 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -146,7 +146,7 @@ CAnomalyJob::CAnomalyJob(const std::string& jobId, m_MaxDetectors{std::numeric_limits::max()}, m_PersistenceManager{persistenceManager}, m_MaxQuantileInterval{maxQuantileInterval}, m_LastNormalizerPersistTime{core::CTimeUtils::now()}, m_LatestRecordTime{0}, - m_LastResultsTime{0}, m_Aggregator{modelConfig}, m_Normalizer{modelConfig} { + m_LastResultsTime{0}, m_Aggregator{modelConfig}, m_Normalizer{limits, modelConfig} { m_JsonOutputWriter.limitNumberRecords(maxAnomalyRecords); m_Limits.resourceMonitor().memoryUsageReporter(std::bind( @@ -1649,6 +1649,9 @@ void CAnomalyJob::pruneAllModels(std::size_t buckets) { (buckets == 0) ? detector->pruneModels() : detector->pruneModels(buckets); } } +const model::CHierarchicalResultsNormalizer& CAnomalyJob::normalizer() const { + return m_Normalizer; +} CAnomalyJob::TAnomalyDetectorPtr CAnomalyJob::makeDetector(const model::CAnomalyDetectorModelConfig& modelConfig, diff --git a/lib/api/CResultNormalizer.cc b/lib/api/CResultNormalizer.cc index 61dbae43cb..1df095d0d8 100644 --- a/lib/api/CResultNormalizer.cc +++ b/lib/api/CResultNormalizer.cc @@ -37,11 +37,12 @@ const std::string CResultNormalizer::INFLUENCER_LEVEL("infl"); const std::string CResultNormalizer::ZERO("0"); CResultNormalizer::CResultNormalizer(const model::CAnomalyDetectorModelConfig& modelConfig, - CSimpleOutputWriter& outputWriter) + CSimpleOutputWriter& outputWriter, + model::CLimits& limits) : m_ModelConfig(modelConfig), m_OutputWriter(outputWriter), m_WriteFieldNames(true), m_OutputFieldNormalizedScore(m_OutputFields[NORMALIZED_SCORE_NAME]), - m_Normalizer(m_ModelConfig) { + m_Normalizer(limits, m_ModelConfig) { } bool CResultNormalizer::initNormalizer(const std::string& stateFileName) { diff --git a/lib/api/unittest/CAnomalyJobTest.cc b/lib/api/unittest/CAnomalyJobTest.cc index 0de490e0e5..d5384327ef 100644 --- a/lib/api/unittest/CAnomalyJobTest.cc +++ b/lib/api/unittest/CAnomalyJobTest.cc @@ -1183,4 +1183,26 @@ BOOST_AUTO_TEST_CASE(testRestoreFromBadState) { } } +BOOST_AUTO_TEST_CASE(testHierarchicalResultsNormalizerShouldIncreaseMemoryUsage) { + model::CLimits limits; + auto jobConfig = CTestAnomalyJob::makeSimpleJobConfig("metric", "value", "", "", ""); + auto modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + std::stringstream outputStrm; + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); + + CTestAnomalyJob job("job", limits, jobConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob::TStrStrUMap const dataRows = { + {"time", "12345678"}, {"value", "1.0"}, {"greenhouse", "rhubarb"}}; + + BOOST_TEST_REQUIRE(job.handleRecord(dataRows)); + auto resourceMonitor = limits.resourceMonitor(); + resourceMonitor.forceRefreshAll(); + BOOST_TEST_REQUIRE(job.mutableNormalizer().memoryUsage() > 0); + + // Unregister the normalizer and check that memory usage decreases + auto memoryUsageBeforeUnregister = resourceMonitor.totalMemory(); + resourceMonitor.unRegisterComponent(job.mutableNormalizer()); + resourceMonitor.forceRefreshAll(); + BOOST_TEST_REQUIRE(resourceMonitor.totalMemory() < memoryUsageBeforeUnregister); +} BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index a88803e221..ba44163e7c 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -1688,7 +1689,8 @@ BOOST_AUTO_TEST_CASE(testPersistNormalizer) { ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); - ml::model::CHierarchicalResultsNormalizer normalizer(modelConfig); + ml::model::CLimits limits(false); + ml::model::CHierarchicalResultsNormalizer normalizer(limits, modelConfig); writer.persistNormalizer(normalizer, persistTime); writer.finalise(); } diff --git a/lib/api/unittest/CRestorePreviousStateTest.cc b/lib/api/unittest/CRestorePreviousStateTest.cc index 86bf30b2e2..7b06ec1e52 100644 --- a/lib/api/unittest/CRestorePreviousStateTest.cc +++ b/lib/api/unittest/CRestorePreviousStateTest.cc @@ -269,7 +269,8 @@ BOOST_FIXTURE_TEST_CASE(testRestoreNormalizer, ml::test::CProgramCounterClearing ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(3600); ml::api::CCsvOutputWriter outputWriter; - ml::api::CResultNormalizer normalizer(modelConfig, outputWriter); + ml::model::CLimits limits(false); + ml::api::CResultNormalizer normalizer(modelConfig, outputWriter, limits); BOOST_TEST_REQUIRE(normalizer.initNormalizer( "testfiles/state/" + version.s_Version + "/normalizer_state.json")); } diff --git a/lib/api/unittest/CResultNormalizerTest.cc b/lib/api/unittest/CResultNormalizerTest.cc index 316611364d..a3d08acf6d 100644 --- a/lib/api/unittest/CResultNormalizerTest.cc +++ b/lib/api/unittest/CResultNormalizerTest.cc @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -31,7 +32,8 @@ BOOST_AUTO_TEST_CASE(testInitNormalizerPartitioned) { ml::api::CNdJsonOutputWriter outputWriter; - ml::api::CResultNormalizer normalizer(modelConfig, outputWriter); + ml::model::CLimits limits(false); + ml::api::CResultNormalizer normalizer(modelConfig, outputWriter, limits); BOOST_TEST_REQUIRE(normalizer.initNormalizer("testfiles/new_quantilesState.json")); LOG_DEBUG(<< "normalizer initialized"); @@ -390,7 +392,8 @@ BOOST_AUTO_TEST_CASE(testInitNormalizer) { ml::api::CNdJsonOutputWriter outputWriter; - ml::api::CResultNormalizer normalizer(modelConfig, outputWriter); + ml::model::CLimits limits(false); + ml::api::CResultNormalizer normalizer(modelConfig, outputWriter, limits); BOOST_TEST_REQUIRE(normalizer.initNormalizer("testfiles/quantilesState.json")); diff --git a/lib/api/unittest/CTestAnomalyJob.h b/lib/api/unittest/CTestAnomalyJob.h index 01187dfcdf..a643420f38 100644 --- a/lib/api/unittest/CTestAnomalyJob.h +++ b/lib/api/unittest/CTestAnomalyJob.h @@ -57,6 +57,10 @@ class CTestAnomalyJob : public ml::api::CAnomalyJob { const std::string& summaryCountFieldName = ""); static ml::api::CAnomalyJobConfig makeJobConfig(const std::string& detectorsConfig); + + ml::model::CHierarchicalResultsNormalizer& mutableNormalizer() const { + return const_cast(this->normalizer()); + } }; #endif // INCLUDED_CTestAnomalyJob_h diff --git a/lib/model/CHierarchicalResultsAggregator.cc b/lib/model/CHierarchicalResultsAggregator.cc index 383699dde3..34eb2a67bd 100644 --- a/lib/model/CHierarchicalResultsAggregator.cc +++ b/lib/model/CHierarchicalResultsAggregator.cc @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/lib/model/CHierarchicalResultsNormalizer.cc b/lib/model/CHierarchicalResultsNormalizer.cc index ad0e249f87..3f83b6f81c 100644 --- a/lib/model/CHierarchicalResultsNormalizer.cc +++ b/lib/model/CHierarchicalResultsNormalizer.cc @@ -22,7 +22,6 @@ #include #include -#include #include namespace ml { @@ -38,7 +37,6 @@ const std::string INFLUENCER_CUE_PREFIX("infl"); const std::string PARTITION_CUE_PREFIX("part"); const std::string PERSON_CUE_PREFIX("per"); const std::string LEAF_CUE_PREFIX("leaf"); -const std::string EMPTY_STRING; } namespace hierarchical_results_normalizer_detail { @@ -59,12 +57,47 @@ std::uint64_t SNormalizer::checksum() const { std::uint64_t seed = maths::common::CChecksum::calculate(0, s_Description); return maths::common::CChecksum::calculate(seed, s_Normalizer); } + +void SNormalizer::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const { + mem->setName("SNormalizer Memory Usage"); + core::memory_debug::dynamicSize("s_Description", s_Description, mem); + core::memory_debug::dynamicSize("s_Normalizer", s_Normalizer, mem); +} + +std::size_t SNormalizer::memoryUsage() const { + std::size_t mem = 0; + mem += core::memory::dynamicSize(s_Description); + mem += core::memory::dynamicSize(s_Normalizer); + return mem; +} +} + +void CHierarchicalResultsNormalizer::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const { + mem->setName(" Hierarchical Results Normalizer Memory Usage"); + this->CHierarchicalResultsLevelSet::debugMemoryUsage(mem->addChild()); } -CHierarchicalResultsNormalizer::CHierarchicalResultsNormalizer(const CAnomalyDetectorModelConfig& modelConfig) +std::size_t CHierarchicalResultsNormalizer::memoryUsage() const { + return this->CHierarchicalResultsLevelSet::memoryUsage(); +} +std::size_t CHierarchicalResultsNormalizer::staticSize() const { + return sizeof(*this); +} + +void CHierarchicalResultsNormalizer::updateModelSizeStats( + CResourceMonitor::SModelSizeStats& /*modelSizeStats*/) const { + // do nothing +} + +CHierarchicalResultsNormalizer::CHierarchicalResultsNormalizer(CLimits& limits, + const CAnomalyDetectorModelConfig& modelConfig) : TBase(TNormalizer(std::string(), std::make_shared(modelConfig))), - m_Job(E_NoOp), m_ModelConfig(modelConfig), m_HasLastUpdateCausedBigChange(false) { + m_Limits(limits), m_ModelConfig(modelConfig) { + limits.resourceMonitor().registerComponent(*this); +} +CHierarchicalResultsNormalizer::~CHierarchicalResultsNormalizer() { + m_Limits.resourceMonitor().unRegisterComponent(*this); // NOSONAR } void CHierarchicalResultsNormalizer::setJob(EJob job) { diff --git a/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc b/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc index b16640f2b6..b26b727185 100644 --- a/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc +++ b/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc @@ -10,6 +10,7 @@ */ #include +#include #include #include @@ -26,6 +27,8 @@ struct STestNode { STestNode(const std::string& name) : s_Name(name) {} std::string print() const { return s_Name; } std::string s_Name; + + std::size_t memoryUsage() const { return sizeof(s_Name); } }; class CConcreteHierarchicalResultsLevelSet @@ -131,4 +134,29 @@ BOOST_AUTO_TEST_CASE(testElements) { } } +BOOST_AUTO_TEST_CASE(testMemoryUsage) { + CConcreteHierarchicalResultsLevelSet levelSet(STestNode("root")); + std::size_t memoryUsage = levelSet.memoryUsage(); + BOOST_REQUIRE(memoryUsage > 0); + + auto addAndCheckMemoryUsage = [&memoryUsage, &levelSet](auto& container, + const std::string& name) { + container.emplace_back(ml::core::CCompressedDictionary<1>::CWord(), + STestNode(name)); + std::size_t newMemoryUsage = levelSet.memoryUsage(); + BOOST_REQUIRE(newMemoryUsage > memoryUsage); + memoryUsage = newMemoryUsage; + }; + + addAndCheckMemoryUsage(levelSet.m_InfluencerBucketSet, "influencer bucket 1"); + addAndCheckMemoryUsage(levelSet.m_InfluencerSet, "influencer 1"); + addAndCheckMemoryUsage(levelSet.m_PartitionSet, "partition 1"); + addAndCheckMemoryUsage(levelSet.m_PersonSet, "person 1"); + addAndCheckMemoryUsage(levelSet.m_LeafSet, "leaf 1"); + + auto debugMemoryUsage = std::make_shared(); + levelSet.debugMemoryUsage(debugMemoryUsage); + BOOST_REQUIRE(debugMemoryUsage->usage() == memoryUsage); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/model/unittest/CHierarchicalResultsTest.cc b/lib/model/unittest/CHierarchicalResultsTest.cc index 91b4c57f9e..df332506c3 100644 --- a/lib/model/unittest/CHierarchicalResultsTest.cc +++ b/lib/model/unittest/CHierarchicalResultsTest.cc @@ -1533,7 +1533,8 @@ BOOST_AUTO_TEST_CASE(testNormalizer) { model::CAnomalyDetectorModelConfig::defaultConfig(); model::CHierarchicalResultsAggregator aggregator(modelConfig); model::CHierarchicalResultsProbabilityFinalizer finalizer; - model::CHierarchicalResultsNormalizer normalizer(modelConfig); + model::CLimits l; + model::CHierarchicalResultsNormalizer normalizer(l, modelConfig); static const std::string FUNC("max"); static const ml::model::function_t::EFunction function(ml::model::function_t::E_IndividualMetricMax); @@ -1731,7 +1732,8 @@ BOOST_AUTO_TEST_CASE(testNormalizer) { LOG_DEBUG(<< "Compressed JSON doc is:\n" << origJson); { - model::CHierarchicalResultsNormalizer newNormalizerJson(modelConfig); + model::CLimits limits; + model::CHierarchicalResultsNormalizer newNormalizerJson(limits, modelConfig); std::stringstream stream(origJson); BOOST_REQUIRE_EQUAL(model::CHierarchicalResultsNormalizer::E_Ok, newNormalizerJson.fromJsonStream(stream)); @@ -1761,7 +1763,8 @@ BOOST_AUTO_TEST_CASE(testNormalizer) { } while (filteredInput); LOG_DEBUG(<< "Uncompressed JSON doc is:\n" << uncompressedJson); - model::CHierarchicalResultsNormalizer newNormalizerJson(modelConfig); + model::CLimits limits; + model::CHierarchicalResultsNormalizer newNormalizerJson(limits, modelConfig); std::stringstream stream(uncompressedJson); BOOST_REQUIRE_EQUAL(model::CHierarchicalResultsNormalizer::E_Ok, newNormalizerJson.fromJsonStream(stream)); diff --git a/lib/model/unittest/CMakeLists.txt b/lib/model/unittest/CMakeLists.txt index 47a28b792a..8e6d6dcf48 100644 --- a/lib/model/unittest/CMakeLists.txt +++ b/lib/model/unittest/CMakeLists.txt @@ -30,8 +30,8 @@ set (SRCS CForecastModelPersistTest.cc CFunctionTypesTest.cc CGathererToolsTest.cc - CHierarchicalResultsTest.cc CHierarchicalResultsLevelSetTest.cc + CHierarchicalResultsTest.cc CInterimBucketCorrectorTest.cc CLimitsTest.cc CLocalCategoryIdTest.cc From b39d26123b76bd0b514c41465f627b341e9455e9 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Thu, 27 Mar 2025 13:59:29 +1300 Subject: [PATCH 28/38] [8.19][ML] Better messaging regarding OOM process termination (#2841) (#2843) This PR provides a more detailed message when a process is terminated with SIGKILL. On Linux, the OOM (Out Of Memory) system handler will kill processes, according to heuristics, when the OS runs low on memory. Our native processes (apart from controller) are configured so that they would be chosen first to be terminated in such a situation. The OOM handler terminates processes with a SIGKILL (signal 9). SIGKILL is not able to be handled by processes and will result in immediate termination, not allowing for any logging of the situation. However, the parent process - controller - can detect and report on the death of its children. Relates https://github.com/elastic/ml-team/issues/1158 Backports #2841 --- docs/CHANGELOG.asciidoc | 6 ++++++ lib/core/CDetachedProcessSpawner.cc | 17 +++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 3de30494a0..a7ce57005a 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -35,6 +35,12 @@ * Track memory used in the hierarchical results normalizer. (See {ml-pull}2831[#2831].) +== {es} version 8.19.0 + +=== Enhancements + +* Better messaging regarding OOM process termination. (See {ml-pull}2841[#2841].) + == {es} version 8.18.0 === Enhancements diff --git a/lib/core/CDetachedProcessSpawner.cc b/lib/core/CDetachedProcessSpawner.cc index d01031c64c..c7f9b1186e 100644 --- a/lib/core/CDetachedProcessSpawner.cc +++ b/lib/core/CDetachedProcessSpawner.cc @@ -185,13 +185,22 @@ class CTrackerThread : public CThread { // at a lower level LOG_INFO(<< "Child process with PID " << pid << " was terminated by signal " << signal); - } else { + } else if (signal == SIGKILL) { // This should never happen if the system is working // normally - possible reasons are the Linux OOM - // killer, manual intervention and bugs that cause - // access violations + // killer or manual intervention. The latter is highly unlikely + // if running in the cloud. + LOG_ERROR(<< "Child process with PID " << pid << " was terminated by signal 9 (SIGKILL)." + << " This is likely due to the OOM killer." + << " Please check system logs for more details."); + } else { + // This should never happen if the system is working + // normally - possible reasons are bugs that cause + // access violations or manual intervention. The latter is highly unlikely + // if running in the cloud. LOG_ERROR(<< "Child process with PID " << pid - << " was terminated by signal " << signal); + << " was terminated by signal " << signal + << " Please check system logs for more details."); } } else { int exitCode = WEXITSTATUS(status); From ba71f50f297fadb99fd4a8706803388e3959dd25 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Fri, 2 May 2025 09:47:31 +0200 Subject: [PATCH 29/38] [8.19] [ML] Restrict file system access for pytorch models (#2851) (#2853) * [ML] Restrict file system access for pytorch models (#2851) This PR ensures that the PyTorch models are not allowed to access the file system. It accomplishes the goal by inspecting the model's operations and prohibiting the loading of models with operations that read or write files. (cherry picked from commit fff4062ad42ca40d9cb765505c83f580b72b106b) * rewrite for C++14 compatibility --- bin/pytorch_inference/Main.cc | 24 ++++++++++++++++++++++++ docs/CHANGELOG.asciidoc | 5 +++++ 2 files changed, 29 insertions(+) diff --git a/bin/pytorch_inference/Main.cc b/bin/pytorch_inference/Main.cc index 92db0aacec..98f303df4e 100644 --- a/bin/pytorch_inference/Main.cc +++ b/bin/pytorch_inference/Main.cc @@ -41,6 +41,29 @@ #include #include +namespace { +// Add more forbidden ops here if needed +const std::unordered_set FORBIDDEN_OPERATIONS = {"aten::from_file", "aten::save"}; + +void verifySafeModel(const torch::jit::script::Module& module_) { + try { + const auto method = module_.get_method("forward"); + const auto graph = method.graph(); + for (const auto& node : graph->nodes()) { + const std::string opName = node->kind().toQualString(); + if (FORBIDDEN_OPERATIONS.find(opName) != FORBIDDEN_OPERATIONS.end()) { + HANDLE_FATAL(<< "Loading the inference process failed because it contains forbidden operation: " + << opName); + } + } + } catch (const c10::Error& e) { + LOG_FATAL(<< "Failed to get forward method: " << e.what()); + } + + LOG_DEBUG(<< "Model verified: no forbidden operations detected."); +} +} + torch::Tensor infer(torch::jit::script::Module& module_, ml::torch::CCommandParser::SRequest& request) { @@ -280,6 +303,7 @@ int main(int argc, char** argv) { return EXIT_FAILURE; } module_ = torch::jit::load(std::move(readAdapter)); + verifySafeModel(module_); module_.eval(); LOG_DEBUG(<< "model loaded"); diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index a7ce57005a..a77ed44de5 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -49,6 +49,11 @@ * Upgrade Boost libraries to version 1.86. (See {ml-pull}2780[#2780], {ml-pull}2779[#2779].) * Drop support for macOS Intel builds. (See {ml-pull}2795[#2795].) +== {es} version 8.17.7 + +=== Enhancements +* Restrict file system access for PyTorch models (See {ml-pull}2851[#2851].) + == {es} version 8.16.6 === Bug Fixes From e058fa14c7422a375748121c4389d633d110e9cf Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 26 Jun 2025 09:11:34 +0200 Subject: [PATCH 30/38] [ML] Bump version to 8.19.1 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index 4e5c2aad61..f7dc45f9cd 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.daemon=false -elasticsearchVersion=8.19.0 +elasticsearchVersion=8.19.1 artifactName=ml-cpp From b81eec921bbb2f47cdc1fbc8aa5f09f9d86fb43d Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Thu, 26 Jun 2025 10:12:50 +0200 Subject: [PATCH 31/38] [ML] Revert premature bumping to 8.19.1 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index f7dc45f9cd..4e5c2aad61 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.daemon=false -elasticsearchVersion=8.19.1 +elasticsearchVersion=8.19.0 artifactName=ml-cpp From c2028ca912237bff83d2bf22686a8bd2e2bd4dbb Mon Sep 17 00:00:00 2001 From: Jan Kuipers <148754765+jan-elastic@users.noreply.github.com> Date: Tue, 29 Jul 2025 10:48:00 +0200 Subject: [PATCH 32/38] [ML] Bump version to 8.19.1 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index 4e5c2aad61..f7dc45f9cd 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.daemon=false -elasticsearchVersion=8.19.0 +elasticsearchVersion=8.19.1 artifactName=ml-cpp From b839c4c74f91a9715f50b7d6d28cbe3290bc5c0f Mon Sep 17 00:00:00 2001 From: Pat Whelan Date: Thu, 7 Aug 2025 09:01:36 -0400 Subject: [PATCH 33/38] [ML] Bump version to 8.19.2 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index f7dc45f9cd..95e277dd1b 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.daemon=false -elasticsearchVersion=8.19.1 +elasticsearchVersion=8.19.2 artifactName=ml-cpp From 207882bdb380031a3ede0eea05f4087639d4a639 Mon Sep 17 00:00:00 2001 From: Pat Whelan Date: Tue, 12 Aug 2025 09:41:38 -0400 Subject: [PATCH 34/38] [ML] Bump version to 8.19.3 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index 95e277dd1b..cb1ef37b27 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.daemon=false -elasticsearchVersion=8.19.2 +elasticsearchVersion=8.19.3 artifactName=ml-cpp From 2bb9ad00234ea529ecd1a42c0403f0ed274f02a9 Mon Sep 17 00:00:00 2001 From: Jan Kuipers <148754765+jan-elastic@users.noreply.github.com> Date: Thu, 28 Aug 2025 11:32:30 +0200 Subject: [PATCH 35/38] [ML] Bump version to 8.19.4 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index cb1ef37b27..0b6741755e 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.daemon=false -elasticsearchVersion=8.19.3 +elasticsearchVersion=8.19.4 artifactName=ml-cpp From 41e13efcbc40e9a9a7e02046bb2ccdaa4915e908 Mon Sep 17 00:00:00 2001 From: Jan Kuipers <148754765+jan-elastic@users.noreply.github.com> Date: Thu, 18 Sep 2025 08:42:11 +0200 Subject: [PATCH 36/38] [ML] Bump version to 8.19.5 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index 0b6741755e..25f50edf9a 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.daemon=false -elasticsearchVersion=8.19.4 +elasticsearchVersion=8.19.5 artifactName=ml-cpp From 6f2e179a7baadcba9a157febaafce58c0960acac Mon Sep 17 00:00:00 2001 From: Jan Kuipers <148754765+jan-elastic@users.noreply.github.com> Date: Mon, 6 Oct 2025 14:16:50 +0200 Subject: [PATCH 37/38] [ML] Bump version to 8.19.6 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index 25f50edf9a..9a07b59ae1 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.daemon=false -elasticsearchVersion=8.19.5 +elasticsearchVersion=8.19.6 artifactName=ml-cpp From 9900c8fc93fc5eae3d64e1b2ff037796be5de070 Mon Sep 17 00:00:00 2001 From: Pat Whelan Date: Thu, 23 Oct 2025 09:12:36 -0400 Subject: [PATCH 38/38] [ML] Bump version to 8.19.7 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index 9a07b59ae1..76bbd6229d 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.daemon=false -elasticsearchVersion=8.19.6 +elasticsearchVersion=8.19.7 artifactName=ml-cpp