diff --git a/.buildkite/branch.json.py b/.buildkite/branch.json.py index 3c279e3a93..374326ddd5 100755 --- a/.buildkite/branch.json.py +++ b/.buildkite/branch.json.py @@ -34,7 +34,7 @@ def main(): build_windows = pipeline_steps.generate_step_template("Windows", "build", "", config.build_x86_64) pipeline_steps.append(build_windows) if config.build_macos: - build_macos = pipeline_steps.generate_step_template("MacOS", "build", config.build_aarch64, config.build_x86_64) + build_macos = pipeline_steps.generate_step_template("MacOS", "build", config.build_aarch64, "") pipeline_steps.append(build_macos) if config.build_linux: build_linux = pipeline_steps.generate_step_template("Linux", "build", config.build_aarch64, config.build_x86_64) diff --git a/.buildkite/hooks/post-checkout b/.buildkite/hooks/post-checkout index 7ed9a76c55..2833da5326 100644 --- a/.buildkite/hooks/post-checkout +++ b/.buildkite/hooks/post-checkout @@ -21,6 +21,8 @@ if [[ "$BUILDKITE_PIPELINE_SLUG" == ml-cpp* ]]; then export BUILDKITE_ANALYTICS_TOKEN=$(vault read secret/ci/elastic-ml-cpp/buildkite/test_analytics/linux_aarch64 | awk '/^token/ {print $2;}') elif [[ "$BUILDKITE_STEP_KEY" == "build_test_macos-aarch64-RelWithDebInfo" ]]; then export BUILDKITE_ANALYTICS_TOKEN=$(vault read secret/ci/elastic-ml-cpp/buildkite/test_analytics/macos_aarch64 | awk '/^token/ {print $2;}') + elif [[ "$BUILDKITE_STEP_KEY" == "build_test_macos-x86_64-RelWithDebInfo" ]]; then + export BUILDKITE_ANALYTICS_TOKEN=$(vault read secret/ci/elastic-ml-cpp/buildkite/test_analytics/macos_x86_64 | awk '/^token/ {print $2;}') else [[ "$BUILDKITE_STEP_KEY" == "build_test_Windows-x86_64-RelWithDebInfo" ]] export BUILDKITE_ANALYTICS_TOKEN=$(vault read secret/ci/elastic-ml-cpp/buildkite/test_analytics/windows_x86_64 | awk '/^token/ {print $2;}') fi diff --git a/.buildkite/job-build-test-all-debug.json.py b/.buildkite/job-build-test-all-debug.json.py index 1ce77a61ae..e02e0908bf 100755 --- a/.buildkite/job-build-test-all-debug.json.py +++ b/.buildkite/job-build-test-all-debug.json.py @@ -44,7 +44,7 @@ def main(): debug_windows = pipeline_steps.generate_step_template("Windows", "debug", "", config.build_x86_64) pipeline_steps.append(debug_windows) if config.build_macos: - debug_macos = pipeline_steps.generate_step_template("MacOS", "debug", config.build_aarch64, config.build_x86_64) + debug_macos = pipeline_steps.generate_step_template("MacOS", "debug", config.build_aarch64, "") pipeline_steps.append(debug_macos) if config.build_linux: debug_linux = pipeline_steps.generate_step_template("Linux", "debug", config.build_aarch64, config.build_x86_64) diff --git a/.buildkite/ml_pipeline/config.py b/.buildkite/ml_pipeline/config.py index 363d6826ef..4669ce8b58 100644 --- a/.buildkite/ml_pipeline/config.py +++ b/.buildkite/ml_pipeline/config.py @@ -40,6 +40,19 @@ def parse_comment(self): if self.run_pytorch_tests or self.run_qa_tests: self.action = "build" + # If the ACTION is set to "run_qa_tests" then set some optional variables governing the ES branch to build, the + # stack version to set and the subset of QA tests to run, depending on whether appropriate variables are set in + # the environment. + if self.run_qa_tests: + if "GITHUB_PR_COMMENT_VAR_BRANCH" in os.environ: + os.environ["ES_BRANCH"] = os.environ["GITHUB_PR_COMMENT_VAR_BRANCH"] + + if "GITHUB_PR_COMMENT_VAR_VERSION" in os.environ: + os.environ["STACK_VERSION"] = os.environ["GITHUB_PR_COMMENT_VAR_VERSION"] + + if "GITHUB_PR_COMMENT_VAR_ARGS" in os.environ: + os.environ["QAF_TESTS_TO_RUN"] = os.environ["GITHUB_PR_COMMENT_VAR_ARGS"] + # If the GITHUB_PR_COMMENT_VAR_ARCH environment variable is set then attemot to parse it # into comma separated values. If the values are one or both of "aarch64" or "x86_64" then set the member # variables self.build_aarch64, self.build_x86_64 accordingly. These values will be used to restrict the build diff --git a/.buildkite/pipeline.json.py b/.buildkite/pipeline.json.py index 1840d5dc9c..a466636ec9 100755 --- a/.buildkite/pipeline.json.py +++ b/.buildkite/pipeline.json.py @@ -43,7 +43,7 @@ def main(): build_windows = pipeline_steps.generate_step_template("Windows", config.action, "", config.build_x86_64) pipeline_steps.append(build_windows) if config.build_macos: - build_macos = pipeline_steps.generate_step_template("MacOS", config.action, config.build_aarch64, config.build_x86_64) + build_macos = pipeline_steps.generate_step_template("MacOS", config.action, config.build_aarch64, "") pipeline_steps.append(build_macos) if config.build_linux: build_linux = pipeline_steps.generate_step_template("Linux", config.action, config.build_aarch64, config.build_x86_64) diff --git a/.buildkite/pipelines/build_linux.json.py b/.buildkite/pipelines/build_linux.json.py index 8416a9a042..3178391fbf 100755 --- a/.buildkite/pipelines/build_linux.json.py +++ b/.buildkite/pipelines/build_linux.json.py @@ -36,7 +36,7 @@ "cpu": "6", "ephemeralStorage": "20G", "memory": "64G", - "image": os.getenv("DOCKER_IMAGE", "docker.elastic.co/ml-dev/ml-linux-build:30") + "image": os.getenv("DOCKER_IMAGE", "docker.elastic.co/ml-dev/ml-linux-build:32") }, "aarch64": { "provider": "aws", @@ -101,7 +101,7 @@ def main(args): "cpu": "6", "ephemeralStorage": "20G", "memory": "64G", - "image": "docker.elastic.co/ml-dev/ml-linux-aarch64-cross-build:13" + "image": "docker.elastic.co/ml-dev/ml-linux-aarch64-cross-build:15" }, "commands": [ ".buildkite/scripts/steps/build_and_test.sh" diff --git a/.buildkite/pipelines/build_macos.json.py b/.buildkite/pipelines/build_macos.json.py index 82d35cdb5c..7d20e24caf 100755 --- a/.buildkite/pipelines/build_macos.json.py +++ b/.buildkite/pipelines/build_macos.json.py @@ -21,8 +21,7 @@ from itertools import product archs = [ - "aarch64", - "x86_64", + "aarch64" ] build_types = [ "RelWithDebInfo", @@ -32,10 +31,6 @@ "debug" ] agents = { - "x86_64": { - "provider": "orka", - "image": "ml-macos-12-x86_64-001.img" - }, "aarch64": { "provider": "orka", "image": "ml-macos-12-arm-001.orkasi" @@ -51,16 +46,6 @@ "CMAKE_FLAGS": "-DCMAKE_TOOLCHAIN_FILE=cmake/darwin-aarch64.cmake", "RUN_TESTS": "true", "BOOST_TEST_OUTPUT_FORMAT_FLAGS": "--logger=JUNIT,error,boost_test_results.junit", - }, - "x86_64": { - "TMPDIR": "/tmp", - "HOMEBREW_PREFIX": "/opt/homebrew", - "PATH": "/opt/homebrew/bin:$PATH", - "ML_DEBUG": "0", - "CPP_CROSS_COMPILE": "", - "CMAKE_FLAGS": "-DCMAKE_TOOLCHAIN_FILE=cmake/darwin-x86_64.cmake", - "RUN_TESTS": "true", - "BOOST_TEST_OUTPUT_FORMAT_FLAGS": "--logger=JUNIT,error,boost_test_results.junit", } } @@ -122,11 +107,6 @@ def main(args): action='https://github.com/store_true', default=False, help="Build for aarch64?.") - parser.add_argument("--build-x86_64", - required=False, - action='https://github.com/store_true', - default=False, - help="Build for x86_64?") args = parser.parse_args() diff --git a/.buildkite/pipelines/create_dra.yml.sh b/.buildkite/pipelines/create_dra.yml.sh index 6a4b87f547..5873caf3f1 100755 --- a/.buildkite/pipelines/create_dra.yml.sh +++ b/.buildkite/pipelines/create_dra.yml.sh @@ -17,7 +17,6 @@ steps: depends_on: - "build_test_linux-aarch64-RelWithDebInfo" - "build_test_linux-x86_64-RelWithDebInfo" - - "build_test_macos-x86_64-RelWithDebInfo" - "build_test_macos-aarch64-RelWithDebInfo" - "build_test_Windows-x86_64-RelWithDebInfo" diff --git a/.buildkite/pipelines/run_qa_tests.yml.sh b/.buildkite/pipelines/run_qa_tests.yml.sh index 5534588ea3..5cff0aed85 100755 --- a/.buildkite/pipelines/run_qa_tests.yml.sh +++ b/.buildkite/pipelines/run_qa_tests.yml.sh @@ -24,5 +24,17 @@ steps: build: message: "${BUILDKITE_MESSAGE}" env: - QAF_TESTS_TO_RUN: "ml_cpp_pr" + QAF_TESTS_TO_RUN: "${QAF_TESTS_TO_RUN:-ml_cpp_pr}" EOL + +if [ "${ES_BRANCH}" != "" ]; then +cat <build|debug|run_qa_tests|run_pytorch_tests) *(?: *on *(?(?:[ ,]*(?:windows|linux|mac(os)?))+))?) *(?(?:[ ,]*aarch64|x86_64)+)?$", + "trigger_comment_regex": "^(?:(?:buildkite +)(?build|debug|run_qa_tests|run_pytorch_tests)(=(?(?:[^ ]+)))? *(?: for ES_BRANCH=(?([.0-9a-zA-Z]+)))? *(?:with STACK_VERSION=(?([.0-9]+)))? *(?: *on *(?(?:[ ,]*(?:windows|linux|mac(os)?))+))?) *(?(?:[, ]*aarch64|x86_64)+)?$", "always_trigger_comment_regex": "^(?:(?:buildkite\\W+)?(?:build|test)\\W+(?:this|it))", "skip_ci_labels": ["skip-ci", "jenkins-ci", ">test-mute", ">docs"], "skip_target_branches": ["6.8", "7.11", "7.12"], diff --git a/.buildkite/scripts/steps/build_and_test.sh b/.buildkite/scripts/steps/build_and_test.sh index af1c912052..b35aaa9ed8 100755 --- a/.buildkite/scripts/steps/build_and_test.sh +++ b/.buildkite/scripts/steps/build_and_test.sh @@ -90,29 +90,11 @@ else # Darwin (macOS) else TASKS="clean buildZip buildZipSymbols check" fi - # For macOS we usually only use a particular version as our build platform - # once Xcode has stopped receiving updates for it. However, with Big Sur - # on ARM we couldn't do this, as Big Sur was the first macOS version for - # ARM. Therefore, the compiler may get upgraded on a CI server, and we - # need to hardcode the version that was used to build Boost for that - # version of Elasticsearch. - if [ "$HARDWARE_ARCH" = aarch64 ] ; then - export BOOSTCLANGVER=13 - fi (cd ${REPO_ROOT} && ./gradlew --info -Dbuild.version_qualifier=${VERSION_QUALIFIER:-} -Dbuild.snapshot=$BUILD_SNAPSHOT -Dbuild.ml_debug=$ML_DEBUG $TASKS) || TEST_OUTCOME=$? else # Darwin x86_64 - # For macOS x86_64 we re-use existing Docker scripts and build directly on the machine - function nproc() { - sysctl -n hw.logicalcpu - } - export -f nproc - if [ "$RUN_TESTS" = "true" ]; then - ${REPO_ROOT}/dev-tools/docker/docker_entrypoint.sh --test - grep passed build/test_status.txt || TEST_OUTCOME=$? - else - ${REPO_ROOT}/dev-tools/docker/docker_entrypoint.sh - fi + echo "Unsupported architecture - macos x86_64" + exit 1 fi fi diff --git a/.ci/orka/README.md b/.ci/orka/README.md index 91978c43c5..7d55be17c6 100644 --- a/.ci/orka/README.md +++ b/.ci/orka/README.md @@ -19,10 +19,6 @@ If you haven't run these before, run the following once so packer downloads the ``` packer init orka-macos-12-arm.pkr.hcl ``` -or -``` -packer init orka-macos-12-x86_64.pkr.hcl -``` ## Build @@ -46,7 +42,6 @@ The source images used for the MacOS builds are slightly modified copies of the The source images are named: * `ml-macos-12-base-arm-fundamental.orkasi` - * `ml-macos-12-base-x86_64-fundamental.img` The source image only has the following changes on it: * Adding passwordless `sudo` for the default `admin` user @@ -70,7 +65,4 @@ The packer script does the following: ## Caveats -* Prior to the dependency on PyTorch 2.3.1 we only needed Orka for ARM builds (CI and dependencies), x86_64 builds were - performed via cross-compilation. However, PyTorch 2.3.1 now requires a more modern version of `clang` that our cross - compilation framework provided. As a suitable Orka base image is available for x86_64, it is now simpler to compile - natively for that architecture. +* As of version 8.18 support for macos x86_64 builds has been dropped. It is necessary to checkout and work on previous branches in order to maintain x86_64 Orka VMs. diff --git a/3rd_party/3rd_party.cmake b/3rd_party/3rd_party.cmake index dd0ee72928..0abe84e850 100644 --- a/3rd_party/3rd_party.cmake +++ b/3rd_party/3rd_party.cmake @@ -49,7 +49,7 @@ if ("${HOST_SYSTEM_NAME}" STREQUAL "darwin") else() set(BOOST_ARCH "a64") endif() - set(BOOST_EXTENSION "mt-${BOOST_ARCH}-1_83.dylib") + set(BOOST_EXTENSION "mt-${BOOST_ARCH}-1_86.dylib") set(BOOST_LIBRARIES "atomic" "chrono" "date_time" "filesystem" "iostreams" "log" "log_setup" "program_options" "regex" "system" "thread" "unit_test_framework") set(XML_LOCATION) set(GCC_RT_LOCATION) @@ -73,7 +73,7 @@ elseif ("${HOST_SYSTEM_NAME}" STREQUAL "linux") set(MKL_PREFIX "libmkl_") set(MKL_LIBRARIES "avx2" "avx512" "core" "def" "gnu_thread" "intel_lp64" "mc3" "vml_avx2" "vml_avx512" "vml_cmpt" "vml_def" "vml_mc3") endif() - set(BOOST_EXTENSION mt-${BOOST_ARCH}-1_83.so.1.83.0) + set(BOOST_EXTENSION mt-${BOOST_ARCH}-1_86.so.1.86.0) set(BOOST_LIBRARIES "atomic" "chrono" "date_time" "filesystem" "iostreams" "log" "log_setup" "program_options" "regex" "system" "thread" "unit_test_framework") set(XML_LOCATION "/usr/local/gcc103/lib") set(XML_EXTENSION ".so.2") @@ -94,7 +94,7 @@ elseif ("${HOST_SYSTEM_NAME}" STREQUAL "linux") set(SYSROOT "/usr/local/sysroot-x86_64-apple-macosx10.14") set(BOOST_LOCATION "${SYSROOT}/usr/local/lib") set(BOOST_COMPILER "clang") - set(BOOST_EXTENSION "mt-x64-1_83.dylib") + set(BOOST_EXTENSION "mt-x64-1_86.dylib") set(BOOST_LIBRARIES "atomic" "chrono" "date_time" "filesystem" "iostreams" "log" "log_setup" "program_options" "regex" "system" "thread" "unit_test_framework") set(XML_LOCATION) set(GCC_RT_LOCATION) @@ -115,7 +115,7 @@ elseif ("${HOST_SYSTEM_NAME}" STREQUAL "linux") message(FATAL_ERROR "Cannot cross compile to $ENV{CPP_CROSS_COMPILE}") return() endif() - set(BOOST_EXTENSION "mt-${BOOST_ARCH}-1_83.so.1.83.0") + set(BOOST_EXTENSION "mt-${BOOST_ARCH}-1_86.so.1.86.0") set(BOOST_LIBRARIES "atomic" "chrono" "date_time" "filesystem" "iostreams" "log" "log_setup" "program_options" "regex" "system" "thread" "unit_test_framework") set(XML_LOCATION "${SYSROOT}/usr/local/gcc103/lib") set(XML_EXTENSION ".so.2") @@ -142,7 +142,7 @@ else() # server is currently set up set(BOOST_LOCATION "${LOCAL_DRIVE}/usr/local/lib") set(BOOST_COMPILER "vc") - set(BOOST_EXTENSION "mt-x64-1_83.dll") + set(BOOST_EXTENSION "mt-x64-1_86.dll") set(BOOST_LIBRARIES "atomic" "chrono" "date_time" "filesystem" "iostreams" "log" "log_setup" "program_options" "regex" "system" "thread" "unit_test_framework") set(XML_LOCATION "${LOCAL_DRIVE}/usr/local/bin") set(XML_EXTENSION ".dll") diff --git a/3rd_party/licenses/boost-INFO.csv b/3rd_party/licenses/boost-INFO.csv index c30b721cb4..9a9a86db27 100644 --- a/3rd_party/licenses/boost-INFO.csv +++ b/3rd_party/licenses/boost-INFO.csv @@ -1,2 +1,2 @@ name,version,revision,url,license,copyright,sourceURL -Boost C++ Libraries,1.83.0,,http://www.boost.org,BSL-1.0,, +Boost C++ Libraries,1.86.0,,http://www.boost.org,BSL-1.0,, diff --git a/3rd_party/licenses/pytorch-INFO.csv b/3rd_party/licenses/pytorch-INFO.csv index 40806db5f4..6750bd1224 100644 --- a/3rd_party/licenses/pytorch-INFO.csv +++ b/3rd_party/licenses/pytorch-INFO.csv @@ -1,2 +1,2 @@ name,version,revision,url,license,copyright,sourceURL -PyTorch,2.3.1,63d5e9221bedd1546b7d364b5ce4171547db12a9,https://pytorch.org,BSD-3-Clause,, +PyTorch,2.5.1,a8d6afb511a69687bbb2b7e88a3cf67917e1697e,https://pytorch.org,BSD-3-Clause,, diff --git a/CMakeLists.txt b/CMakeLists.txt index ea9df4cec3..0aeb7a04fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,6 +69,26 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Darwin") install(FILES ${CMAKE_BINARY_DIR}/Info.plist DESTINATION ${CMAKE_INSTALL_PREFIX}) endif() +if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + if(NOT LINK_TCMALLOC) + set(LINK_TCMALLOC FALSE) + endif() + if(NOT LINK_PROFILER) + set(LINK_PROFILER FALSE) + endif() +else() + if(LINK_TCMALLOC) + message(WARNING "Not linking libtcmalloc on ${CMAKE_SYSTEM_NAME}") + set(LINK_TCMALLOC FALSE) + unset(LINK_TCMALLOC CACHE) + endif() + if(LINK_PROFILER) + message(WARNING "Not linking libprofiler on ${CMAKE_SYSTEM_NAME}") + set(LINK_PROFILER FALSE) + unset(LINK_PROFILER CACHE) + endif() +endif() + message(STATUS "CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}") include_directories(SYSTEM ${ML_SYSTEM_INCLUDE_DIRECTORIES}) @@ -87,3 +107,11 @@ add_subdirectory(devlib) # Add a target to build Doxygen generated documentation # if the doxygen executable can be found ml_doxygen(${CMAKE_SOURCE_DIR}/build/doxygen) + +if (LINK_TCMALLOC) + unset(LINK_TCMALLOC CACHE) +endif() + +if (LINK_PROFILER) + unset(LINK_PROFILER CACHE) +endif() diff --git a/bin/autodetect/CMakeLists.txt b/bin/autodetect/CMakeLists.txt index 970095c0b7..93af5e9201 100644 --- a/bin/autodetect/CMakeLists.txt +++ b/bin/autodetect/CMakeLists.txt @@ -22,6 +22,16 @@ set(ML_LINK_LIBRARIES MlVer ) +if (LINK_TCMALLOC) + message(AUTHOR_WARNING "Linking libtcmalloc. Build is not for production release.") + list(APPEND ML_LINK_LIBRARIES tcmalloc) +endif () + +if (LINK_PROFILER) + message(AUTHOR_WARNING "Linking libprofiler. Build is not for production release.") + list(APPEND ML_LINK_LIBRARIES profiler) +endif () + ml_add_executable(autodetect CCmdLineParser.cc ) diff --git a/bin/normalize/Main.cc b/bin/normalize/Main.cc index f8cbe36501..9aa8e94895 100644 --- a/bin/normalize/Main.cc +++ b/bin/normalize/Main.cc @@ -27,6 +27,7 @@ #include +#include #include #include #include @@ -144,8 +145,12 @@ int main(int argc, char** argv) { ioMgr.outputStream()); }()}; + // Initialize memory limits with default values. + // This is fine as the normalizer doesn't use the memory limit. + ml::model::CLimits limits{false}; + // This object will do the work - ml::api::CResultNormalizer normalizer{modelConfig, *outputWriter}; + ml::api::CResultNormalizer normalizer{modelConfig, *outputWriter, limits}; // Restore state if (!quantilesStateFile.empty()) { diff --git a/bin/pytorch_inference/CMakeLists.txt b/bin/pytorch_inference/CMakeLists.txt index 62a4f3defd..5c6ff63528 100644 --- a/bin/pytorch_inference/CMakeLists.txt +++ b/bin/pytorch_inference/CMakeLists.txt @@ -21,6 +21,16 @@ set(ML_LINK_LIBRARIES ${C10_LIB} ) +if (LINK_TCMALLOC) + message(AUTHOR_WARNING "Linking libtcmalloc. Build is not for production release.") + list(APPEND ML_LINK_LIBRARIES tcmalloc) +endif () + +if (LINK_PROFILER) + message(AUTHOR_WARNING "Linking libprofiler. Build is not for production release.") + list(APPEND ML_LINK_LIBRARIES profiler) +endif () + ml_add_executable(pytorch_inference CBufferedIStreamAdapter.cc CCmdLineParser.cc diff --git a/bin/pytorch_inference/CResultWriter.cc b/bin/pytorch_inference/CResultWriter.cc index b4ca0baeb0..34389dad44 100644 --- a/bin/pytorch_inference/CResultWriter.cc +++ b/bin/pytorch_inference/CResultWriter.cc @@ -136,6 +136,9 @@ std::string CResultWriter::createInnerResult(const ::torch::Tensor& results) { case 2: this->writePrediction<2>(results, jsonWriter); break; + case 1: + this->writePrediction<1>(results, jsonWriter); + break; default: { std::ostringstream ss; ss << "Cannot convert results tensor of size [" << sizes << ']'; diff --git a/bin/pytorch_inference/CResultWriter.h b/bin/pytorch_inference/CResultWriter.h index 037a2769f5..8d809dc9df 100644 --- a/bin/pytorch_inference/CResultWriter.h +++ b/bin/pytorch_inference/CResultWriter.h @@ -191,6 +191,24 @@ class CResultWriter : public TStringBufWriter { jsonWriter.onObjectEnd(); } + //! Write a 1D inference result + template + void writeInferenceResults(const ::torch::TensorAccessor& accessor, + TStringBufWriter& jsonWriter) { + + jsonWriter.onKey(RESULT); + jsonWriter.onObjectBegin(); + jsonWriter.onKey(INFERENCE); + // The Java side requires a 3D array, so wrap the 1D result in an + // extra outer array twice. + jsonWriter.onArrayBegin(); + jsonWriter.onArrayBegin(); + this->writeTensor(accessor, jsonWriter); + jsonWriter.onArrayEnd(); + jsonWriter.onArrayEnd(); + jsonWriter.onObjectEnd(); + } + private: core::CJsonOutputStreamWrapper m_WrappedOutputStream; }; diff --git a/bin/pytorch_inference/Main.cc b/bin/pytorch_inference/Main.cc index 662810a48c..98f303df4e 100644 --- a/bin/pytorch_inference/Main.cc +++ b/bin/pytorch_inference/Main.cc @@ -41,6 +41,29 @@ #include #include +namespace { +// Add more forbidden ops here if needed +const std::unordered_set FORBIDDEN_OPERATIONS = {"aten::from_file", "aten::save"}; + +void verifySafeModel(const torch::jit::script::Module& module_) { + try { + const auto method = module_.get_method("forward"); + const auto graph = method.graph(); + for (const auto& node : graph->nodes()) { + const std::string opName = node->kind().toQualString(); + if (FORBIDDEN_OPERATIONS.find(opName) != FORBIDDEN_OPERATIONS.end()) { + HANDLE_FATAL(<< "Loading the inference process failed because it contains forbidden operation: " + << opName); + } + } + } catch (const c10::Error& e) { + LOG_FATAL(<< "Failed to get forward method: " << e.what()); + } + + LOG_DEBUG(<< "Model verified: no forbidden operations detected."); +} +} + torch::Tensor infer(torch::jit::script::Module& module_, ml::torch::CCommandParser::SRequest& request) { @@ -73,7 +96,12 @@ torch::Tensor infer(torch::jit::script::Module& module_, // For transformers the result tensor is the first element in a tuple. all.push_back(output.toTuple()->elements()[0].toTensor()); } else { - all.push_back(output.toTensor()); + auto outputTensor = output.toTensor(); + if (outputTensor.dim() == 0) { // If the output is a scaler, we need to reshape it into a 1D tensor + all.push_back(outputTensor.reshape({1, 1})); + } else { + all.push_back(std::move(outputTensor)); + } } inputs.clear(); @@ -275,6 +303,7 @@ int main(int argc, char** argv) { return EXIT_FAILURE; } module_ = torch::jit::load(std::move(readAdapter)); + verifySafeModel(module_); module_.eval(); LOG_DEBUG(<< "model loaded"); diff --git a/bin/pytorch_inference/unittest/CResultWriterTest.cc b/bin/pytorch_inference/unittest/CResultWriterTest.cc index 99333db8c3..97b99038a2 100644 --- a/bin/pytorch_inference/unittest/CResultWriterTest.cc +++ b/bin/pytorch_inference/unittest/CResultWriterTest.cc @@ -80,6 +80,18 @@ BOOST_AUTO_TEST_CASE(testCreateInnerInferenceResult) { BOOST_REQUIRE_EQUAL(expected, innerPortion); } +BOOST_AUTO_TEST_CASE(testCreateInnerInferenceResultFor1DimensionalResult) { + std::ostringstream output; + ml::torch::CResultWriter resultWriter{output}; + ::torch::Tensor tensor{::torch::ones({1})}; + std::string innerPortion{resultWriter.createInnerResult(tensor)}; + std::string expected = "\"result\":{\"inference\":" + "[[[1]]]}"; + LOG_INFO(<< "expected: " << expected); + LOG_INFO(<< "actual: " << innerPortion); + BOOST_REQUIRE_EQUAL(expected, innerPortion); +} + BOOST_AUTO_TEST_CASE(testWrapAndWriteInferenceResult) { std::string innerPortion{ "\"result\":{\"inference\":" diff --git a/build-setup/linux.md b/build-setup/linux.md index 1017c1eb94..adcc5d1a0f 100644 --- a/build-setup/linux.md +++ b/build-setup/linux.md @@ -171,17 +171,17 @@ sudo make install to install. -### Boost 1.83.0 +### Boost 1.86.0 -Download version 1.83.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. +Download version 1.86.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. Assuming you chose the `.bz2` version, extract it to a temporary directory: ``` -bzip2 -cd boost_1_83_0.tar.bz2 | tar xvf - +bzip2 -cd boost_1_86_0.tar.bz2 | tar xvf - ``` -In the resulting `boost_1_83_0` directory, run: +In the resulting `boost_1_86_0` directory, run: ``` ./bootstrap.sh --without-libraries=context --without-libraries=coroutine --without-libraries=graph_parallel --without-libraries=mpi --without-libraries=python --without-icu @@ -189,16 +189,13 @@ In the resulting `boost_1_83_0` directory, run: This should build the `b2` program, which in turn is used to build Boost. -Edit `boost/unordered/detail/prime_fmod.hpp` and change line 134 from: - +Edit `boost/unordered/detail/prime_fmod.hpp` and change line 37 from ``` - (13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \ + constexpr static std::size_t const sizes[] = {13ul, 29ul, 53ul, 97ul, ``` - to: - ``` - (3ul)(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \ + constexpr static std::size_t const sizes[] = {3ul, 13ul, 29ul, 53ul, 97ul, ``` Finally, run: @@ -332,7 +329,7 @@ Then copy the shared libraries to the system directory: (cd /opt/intel/oneapi/mkl/2024.0 && tar cf - lib) | (cd /usr/local/gcc103 && sudo tar xvf -) ``` -### PyTorch 2.3.1 +### PyTorch 2.5.1 (This step requires a reasonable amount of memory. It failed on a machine with 8GB of RAM. It succeeded on a 16GB machine. You can specify the number of parallel jobs using environment variable MAX_JOBS. Lower number of jobs will reduce memory usage.) @@ -351,7 +348,7 @@ sudo /usr/local/gcc103/bin/python3.10 -m pip install install numpy pyyaml setupt Then obtain the PyTorch code: ``` -git clone --depth=1 --branch=v2.3.1 git@github.com:pytorch/pytorch.git +git clone --depth=1 --branch=v2.5.1 git@github.com:pytorch/pytorch.git cd pytorch git submodule sync git submodule update --init --recursive @@ -365,6 +362,13 @@ a heuristic virus scanner looking for potentially dangerous function calls in our shipped product will not encounter these functions that run external processes. +Edit the file `./third_party/onnx/CMakeLists.txt` and inserts the line +``` +set(PYTHON_EXECUTABLE "/usr/local/bin/python3.10") +``` +before line 104. This line sets the PYTHON_EXECUTABLE variable to the specified Python +executable path in the CMake configuration file. + Build as follows: ``` @@ -379,7 +383,7 @@ export USE_MKLDNN=ON export USE_QNNPACK=OFF export USE_PYTORCH_QNNPACK=OFF [ $(uname -m) = x86_64 ] && export USE_XNNPACK=OFF -export PYTORCH_BUILD_VERSION=2.3.1 +export PYTORCH_BUILD_VERSION=2.5.1 export PYTORCH_BUILD_NUMBER=1 /usr/local/gcc103/bin/python3.10 setup.py install ``` diff --git a/build-setup/macos.md b/build-setup/macos.md index 9d3719384f..6f6b2afaeb 100644 --- a/build-setup/macos.md +++ b/build-setup/macos.md @@ -66,17 +66,17 @@ xcode-select --install at the command prompt. -### Boost 1.83.0 +### Boost 1.86.0 -Download version 1.83.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. +Download version 1.86.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. Assuming you chose the `.bz2` version, extract it to a temporary directory: ``` -bzip2 -cd boost_1_83_0.tar.bz2 | tar xvf - +bzip2 -cd boost_1_86_0.tar.bz2 | tar xvf - ``` -In the resulting `boost_1_83_0` directory, run: +In the resulting `boost_1_86_0` directory, run: ``` ./bootstrap.sh --with-toolset=clang --without-libraries=context --without-libraries=coroutine --without-libraries=graph_parallel --without-libraries=mpi --without-libraries=python --without-icu @@ -84,16 +84,16 @@ In the resulting `boost_1_83_0` directory, run: This should build the `b2` program, which in turn is used to build Boost. -Edit `boost/unordered/detail/prime_fmod.hpp` and change line 134 from +Edit `boost/unordered/detail/prime_fmod.hpp` and change line 37 from ``` - (13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \ + constexpr static std::size_t const sizes[] = {13ul, 29ul, 53ul, 97ul, ``` to: ``` - (3ul)(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \ + constexpr static std::size_t const sizes[] = {3ul, 13ul, 29ul, 53ul, 97ul, ``` @@ -101,7 +101,7 @@ To complete the build, type: ``` ./b2 -j8 --layout=versioned --disable-icu cxxflags="-std=c++17 -stdlib=libc++ $SSEFLAGS" linkflags="-std=c++17 -stdlib=libc++ -Wl,-headerpad_max_install_names" optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC -sudo ./b2 install --layout=versioned --disable-icu cxxflags="-std=c++17 -stdlib=libc++ $SSEFLAGS" linkflags="-std=c++17 -stdlib=libc++ -Wl,-headerpad_max_install_names" optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC +sudo ./b2 -j8 install --layout=versioned --disable-icu cxxflags="-std=c++17 -stdlib=libc++ $SSEFLAGS" linkflags="-std=c++17 -stdlib=libc++ -Wl,-headerpad_max_install_names" optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC ``` to install the Boost headers and libraries. @@ -127,7 +127,7 @@ Download the graphical installer for Python 3.10.9 from and install: - -``` -chmod +x cmake-3.23.2-Linux-x86_64.sh -sudo ./cmake-3.23.2-Linux-x86_64.sh --skip-license --prefix=/usr/local -``` - -### cctools-port - -You need to obtain Linux ports of several Apple development tools. The easiest way to get them is to use the [cctools-port project on GitHub](https://github.com/tpoechtrager/cctools-port): - -``` -git clone https://github.com/tpoechtrager/cctools-port.git -cd cctools-port/cctools -git checkout 949.0.1-ld64-530 -export CC=clang-8 -export CXX=clang++-8 -./autogen.sh -./configure --target=x86_64-apple-macosx10.14 --with-llvm-config=/usr/bin/llvm-config-8 -make -sudo make install -``` - -The "949.0.1-ld64-530" branch in the [cctools-port repository](https://github.com/tpoechtrager/cctools-port) corresponds to the tools for macOS 10.14 Mojave and clang 8. (A different branch would be required for newer versions of the OS/compiler.) - diff --git a/build-setup/windows.md b/build-setup/windows.md index 15ff2b6b92..f08820e9e6 100644 --- a/build-setup/windows.md +++ b/build-setup/windows.md @@ -119,33 +119,33 @@ nmake nmake install ``` -### Boost 1.83.0 +### Boost 1.86.0 -Download version 1.83.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. +Download version 1.86.0 of Boost from . You must get this exact version, as the Machine Learning build system requires it. Assuming you chose the `.bz2` version, extract it in a Git bash shell using the GNU tar that comes with Git for Windows, e.g.: ``` cd /c/tools -tar jxvf /z/cpp_src/boost_1_83_0.tar.bz2 +tar jxvf /z/cpp_src/boost_1_86_0.tar.bz2 ``` -Edit `boost/unordered/detail/prime_fmod.hpp` and change line 134 from: +Edit `boost/unordered/detail/prime_fmod.hpp` and change line 37 from: ``` - (13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \ + constexpr static std::size_t const sizes[] = {13ul, 29ul, 53ul, 97ul, ``` to: ``` - (3ul)(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \ + constexpr static std::size_t const sizes[] = {3ul, 13ul, 29ul, 53ul, 97ul, ``` Start a command prompt using Start Menu -> Apps -> Visual Studio 2019 -> x64 Native Tools Command Prompt for VS 2019, then in it type: ``` -cd \tools\boost_1_83_0 +cd \tools\boost_1_86_0 bootstrap.bat b2 -j6 --layout=versioned --disable-icu --toolset=msvc-14.2 cxxflags="-std:c++17" linkflags="-std:c++17" --build-type=complete -sZLIB_INCLUDE="C:\tools\zlib-1.2.13" -sZLIB_LIBPATH="C:\tools\zlib-1.2.13" -sZLIB_NAME=zdll --without-context --without-coroutine --without-graph_parallel --without-mpi --without-python architecture=x86 address-model=64 optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_WIN32_WINNT=0x0601 b2 install --prefix=C:\usr\local --layout=versioned --disable-icu --toolset=msvc-14.2 cxxflags="-std:c++17" linkflags="-std:c++17" --build-type=complete -sZLIB_INCLUDE="C:\tools\zlib-1.2.13" -sZLIB_LIBPATH="C:\tools\zlib-1.2.13" -sZLIB_NAME=zdll --without-context --without-coroutine --without-graph_parallel --without-mpi --without-python architecture=x86 address-model=64 optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_WIN32_WINNT=0x0601 @@ -193,7 +193,7 @@ On the "Advanced Options" screen, check "Install for all users" and "Add Python For the time being, do not take advantage of the option on the final installer screen to reconfigure the machine to allow paths longer than 260 characters. We still support Windows versions that do not have this option. -### PyTorch 2.3.1 +### PyTorch 2.5.1 (This step requires a lot of memory. It failed on a machine with 12GB of RAM. It just about fitted on a 20GB machine. 32GB RAM is recommended.) @@ -209,7 +209,7 @@ Next, in a Git bash shell run: ``` cd /c/tools -git clone --depth=1 --branch=v2.3.1 https://github.com/pytorch/pytorch.git +git clone --depth=1 --branch=v2.5.1 https://github.com/pytorch/pytorch.git cd pytorch git submodule sync git submodule update --init --recursive @@ -265,7 +265,7 @@ set USE_QNNPACK=OFF set USE_PYTORCH_QNNPACK=OFF set USE_XNNPACK=OFF set MSVC_Z7_OVERRIDE=OFF -set PYTORCH_BUILD_VERSION=2.3.1 +set PYTORCH_BUILD_VERSION=2.5.1 set PYTORCH_BUILD_NUMBER=1 python setup.py install ``` diff --git a/build.gradle b/build.gradle index f9ae0d56c8..fea0a7da31 100644 --- a/build.gradle +++ b/build.gradle @@ -435,7 +435,7 @@ task buildDependencyReport(type: Exec) { // This gives us the flexibility to build in different // ways and still use the same upload code. task upload(type: UploadS3Task) { - bucket 'prelert-artifacts' + bucket='prelert-artifacts' // Only upload the platform-specific artifacts in this task def zipFileDir = fileTree("${buildDir}/distributions").matching { include "*-aarch64.zip", "*-x86_64.zip" @@ -447,7 +447,7 @@ task upload(type: UploadS3Task) { } task uploadAll(type: UploadS3Task) { - bucket 'prelert-artifacts' + bucket='prelert-artifacts' // Upload ALL artifacts (including the dependency report) in this task def fileDir = fileTree("${buildDir}/distributions").matching { include "ml-cpp-${project.version}*.zip", "dependencies-${version}.csv" @@ -462,7 +462,7 @@ task uberUpload(type: UploadS3Task, dependsOn: [buildUberZipFromDownloads, buildDependenciesZipFromDownloads, buildNoDependenciesZipFromDownloads, buildDependencyReport]) { - bucket 'prelert-artifacts' + bucket='prelert-artifacts' upload buildUberZipFromDownloads.outputs.files.singleFile, "maven/${artifactGroupPath}/${artifactName}/${project.version}/${buildUberZipFromDownloads.outputs.files.singleFile.name}" upload buildDependenciesZipFromDownloads.outputs.files.singleFile, "maven/${artifactGroupPath}/${artifactName}/${project.version}/${buildDependenciesZipFromDownloads.outputs.files.singleFile.name}" upload buildNoDependenciesZipFromDownloads.outputs.files.singleFile, "maven/${artifactGroupPath}/${artifactName}/${project.version}/${buildNoDependenciesZipFromDownloads.outputs.files.singleFile.name}" diff --git a/catalog-info.yaml b/catalog-info.yaml index e1105881dd..195e681d24 100644 --- a/catalog-info.yaml +++ b/catalog-info.yaml @@ -156,7 +156,7 @@ spec: build_branches: true build_pull_request_forks: false cancel_deleted_branch_builds: true - filter_condition: build.branch == "main" || build.branch == "8.15" || build.branch == "7.17" + filter_condition: build.branch == "main" || build.branch == "8.x" || build.branch == "8.16" || build.branch == "7.17" filter_enabled: true publish_blocked_as_pending: true publish_commit_status: false @@ -166,12 +166,16 @@ spec: schedules: Daily 7_17: branch: '7.17' - cronline: 30 02 * * * + cronline: 30 03 * * * message: Daily SNAPSHOT build for 7.17 - Daily 8_15: - branch: '8.15' + Daily 8_16: + branch: '8.16' + cronline: 30 02 * * * + message: Daily SNAPSHOT build for 8.16 + Daily 8_x: + branch: '8.x' cronline: 30 01 * * * - message: Daily SNAPSHOT build for 8.15 + message: Daily SNAPSHOT build for 8.x Daily main: branch: main cronline: 30 00 * * * @@ -219,7 +223,7 @@ spec: ' filter_enabled: true - trigger_mode: none + trigger_mode: code repository: elastic/ml-cpp skip_intermediate_builds: true teams: diff --git a/cmake/compiler/clang.cmake b/cmake/compiler/clang.cmake index bd9ff030db..1749ad0a89 100644 --- a/cmake/compiler/clang.cmake +++ b/cmake/compiler/clang.cmake @@ -9,36 +9,11 @@ # limitation. # -# which compilers to use for C and C++ -if(DEFINED ENV{CPP_CROSS_COMPILE} AND NOT "$ENV{CPP_CROSS_COMPILE}" STREQUAL "") - message(STATUS "Cross compiling: CPP_CROSS_COMPILE = $ENV{CPP_CROSS_COMPILE}") - - set(CROSS_FLAGS --sysroot=${SYSROOT} -B /usr/local/bin -target ${CROSS_TARGET_PLATFORM} -stdlib=libc++) - set(ML_SHARED_LINKER_FLAGS ${CROSS_FLAGS}) - set(ML_EXE_LINKER_FLAGS ${CROSS_FLAGS}) - - # which compilers to use for C and C++ - set(CMAKE_C_COMPILER "clang-8") - set(CMAKE_CXX_COMPILER "clang++-8") - - set(CMAKE_AR "/usr/local/bin/${CROSS_TARGET_PLATFORM}-ar") - set(CMAKE_RANLIB "/usr/local/bin/${CROSS_TARGET_PLATFORM}-ranlib") - set(CMAKE_STRIP "/usr/local/bin/${CROSS_TARGET_PLATFORM}-strip") - set(CMAKE_LD "/usr/local/bin/${CROSS_TARGET_PLATFORM}-ld") - - set(CMAKE_CXX_ARCHIVE_CREATE " -ru ") - - # where is the target environment located - set(CMAKE_FIND_ROOT_PATH /usr/local/sysroot-${CROSS_TARGET_PLATFORM}) -else() - set(CMAKE_C_COMPILER "clang") - set(CMAKE_CXX_COMPILER "clang++") - set(CMAKE_AR "ar") - set(CMAKE_RANLIB "ranlib") - set(CMAKE_STRIP "strip") - - #set(Boost_COMPILER "-clang-darwin13") -endif() +set(CMAKE_C_COMPILER "clang") +set(CMAKE_CXX_COMPILER "clang++") +set(CMAKE_AR "ar") +set(CMAKE_RANLIB "ranlib") +set(CMAKE_STRIP "strip") list(APPEND ML_C_FLAGS diff --git a/cmake/variables.cmake b/cmake/variables.cmake index a442121ce5..54a2401a47 100644 --- a/cmake/variables.cmake +++ b/cmake/variables.cmake @@ -160,7 +160,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows") set(CMAKE_CXX_FLAGS_RELEASE "/O2 /D NDEBUG /D EXCLUDE_TRACE_LOGGING /Qfast_transcendentals /Qvec-report:1") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /O2 /D NDEBUG /D EXCLUDE_TRACE_LOGGING /Qfast_transcendentals /Qvec-report:1") set(CMAKE_CXX_FLAGS_DEBUG "/Zi /Od /RTC1") - set(CMAKE_CXX_FLAGS_SANITIZER "/fsanitize=address /Zi" CACHE STRING + set(CMAKE_CXX_FLAGS_SANITIZER "/fsanitize=address /O2 /Zi" CACHE STRING "Flags used by the C++ compiler during sanitizer builds." FORCE) set(CMAKE_EXE_LINKER_FLAGS_SANITIZER "") @@ -173,7 +173,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -DEXCLUDE_TRACE_LOGGING -Wdisabled-optimization -D_FORTIFY_SOURCE=2") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g -O3 -DNDEBUG -DEXCLUDE_TRACE_LOGGING -Wdisabled-optimization -D_FORTIFY_SOURCE=2") set(CMAKE_CXX_FLAGS_DEBUG "-g") - set(CMAKE_CXX_FLAGS_SANITIZER "-fsanitize=address -g -fno-omit-frame-pointer" CACHE STRING + set(CMAKE_CXX_FLAGS_SANITIZER "-fsanitize=address -g -O3 -fno-omit-frame-pointer" CACHE STRING "Flags used by the C++ compiler during sanitizer builds." FORCE) endif() @@ -182,7 +182,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Darwin") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -DEXCLUDE_TRACE_LOGGING") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g -O3 -DNDEBUG -DEXCLUDE_TRACE_LOGGING") set(CMAKE_CXX_FLAGS_DEBUG "-g") - set(CMAKE_CXX_FLAGS_SANITIZER "-fsanitize=address -g -fno-omit-frame-pointer" CACHE STRING + set(CMAKE_CXX_FLAGS_SANITIZER "-fsanitize=address -g -O3 -fno-omit-frame-pointer" CACHE STRING "Flags used by the C++ compiler during sanitizer builds." FORCE) mark_as_advanced( @@ -243,7 +243,8 @@ set(Boost_USE_STATIC_LIBS OFF) set(Boost_USE_DEBUG_RUNTIME OFF) set(Boost_COMPILER "${ML_BOOST_COMPILER_VER}") -find_package(Boost 1.83.0 EXACT REQUIRED COMPONENTS iostreams filesystem program_options regex date_time log log_setup thread unit_test_framework) +set(Boost_VERSION 1.86.0) +find_package(Boost ${Boost_VERSION} EXACT REQUIRED COMPONENTS iostreams filesystem program_options regex date_time log log_setup thread unit_test_framework) if(Boost_FOUND) list(APPEND ML_SYSTEM_INCLUDE_DIRECTORIES ${Boost_INCLUDE_DIRS}) endif() diff --git a/dev-tools/docker/README.md b/dev-tools/docker/README.md index 4d5716e083..89314f61cc 100644 --- a/dev-tools/docker/README.md +++ b/dev-tools/docker/README.md @@ -36,7 +36,7 @@ required to build the machine learning C++ code dependencies: 2. Change the Dockerfile and build a new image to be used for subsequent builds on this branch. 3. Update the version to be used for builds in docker files that refer to it. -### Depends on: ml-linux-build:30 +### Depends on: ml-linux-build:32 ### Build script: dev-tools/docker/build_linux_dependency_builder_image.sh @@ -44,7 +44,7 @@ required to build the machine learning C++ code dependencies: ## Repository: ml-linux-build -### Latest version: 30 +### Latest version: 32 ### Comments A Docker image that can be used to compile the machine learning @@ -63,7 +63,7 @@ used for subsequent builds on this branch. ## Repository: ml-linux-aarch64-cross-build -### Latest version: 12 +### Latest version: 15 ### Comments A Docker image that can be used to compile the machine learning @@ -82,7 +82,7 @@ used for subsequent builds on this branch. ## Repository: ml-linux-aarch64-native-build -### Latest version: 12 +### Latest version: 15 ### Comments A Docker image that can be used to compile the machine learning @@ -129,24 +129,3 @@ This image is not intended to be built regularly. When changing the ### Build script: dev-tools/docker/build_check_style_image.sh - - -## REPOSITORY: ml-macosx-build - -### VERSION: 18 - -### Comments -A Docker image that can be used to **cross compile** the machine learning -C++ code for Intel macOS - -This image is not intended to be built regularly. When changing the tools -or 3rd party components required to build the machine learning C++ code: - - - 1. increment the version - 2. Change the Dockerfile and build a new image to be -used for subsequent builds on this branch. - 3. Update the version to be used for builds in *dev-tools/docker/macosx_builder/Dockerfile*. - -### Build script: dev-tools/docker/build_macosx_build_image.sh - diff --git a/dev-tools/docker/build_linux_aarch64_cross_build_image.sh b/dev-tools/docker/build_linux_aarch64_cross_build_image.sh index 32263288b8..4289d1c72a 100755 --- a/dev-tools/docker/build_linux_aarch64_cross_build_image.sh +++ b/dev-tools/docker/build_linux_aarch64_cross_build_image.sh @@ -22,7 +22,7 @@ HOST=docker.elastic.co ACCOUNT=ml-dev REPOSITORY=ml-linux-aarch64-cross-build -VERSION=13 +VERSION=15 set -e diff --git a/dev-tools/docker/build_macosx_build_image.sh b/dev-tools/docker/build_macosx_build_image.sh deleted file mode 100755 index 3c915f62da..0000000000 --- a/dev-tools/docker/build_macosx_build_image.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -# -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License -# 2.0 and the following additional limitation. Functionality enabled by the -# files subject to the Elastic License 2.0 may only be used in production when -# invoked by an Elasticsearch process with a license key installed that permits -# use of machine learning features. You may not use this file except in -# compliance with the Elastic License 2.0 and the foregoing additional -# limitation. -# - -# Builds the Docker image that can be used to compile the machine learning -# C++ code for Intel macOS -# -# This script is not intended to be run regularly. When changing the tools -# or 3rd party components required to build the machine learning C++ code -# increment the version, change the Dockerfile and build a new image to be -# used for subsequent builds on this branch. Then update the version to be -# used for builds in docker/macosx_builder/Dockerfile. - -HOST=docker.elastic.co -ACCOUNT=ml-dev -REPOSITORY=ml-macosx-build -VERSION=19 - -set -e - -cd `dirname $0` - -. ./prefetch_docker_image.sh -CONTEXT=macosx_image -prefetch_docker_base_image $CONTEXT/Dockerfile -docker build --no-cache -t $HOST/$ACCOUNT/$REPOSITORY:$VERSION $CONTEXT -# Get a username and password for this by visiting -# https://docker-auth.elastic.co and allowing it to authenticate against your -# GitHub account -docker login $HOST -docker push $HOST/$ACCOUNT/$REPOSITORY:$VERSION - diff --git a/dev-tools/docker/linux_aarch64_cross_builder/Dockerfile b/dev-tools/docker/linux_aarch64_cross_builder/Dockerfile index 19bc1611a6..4169c321a0 100644 --- a/dev-tools/docker/linux_aarch64_cross_builder/Dockerfile +++ b/dev-tools/docker/linux_aarch64_cross_builder/Dockerfile @@ -10,7 +10,7 @@ # # Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-linux-aarch64-cross-build:13 +FROM docker.elastic.co/ml-dev/ml-linux-aarch64-cross-build:15 MAINTAINER David Roberts diff --git a/dev-tools/docker/linux_aarch64_cross_image/Dockerfile b/dev-tools/docker/linux_aarch64_cross_image/Dockerfile index ffd0d6f289..20559d1bb0 100644 --- a/dev-tools/docker/linux_aarch64_cross_image/Dockerfile +++ b/dev-tools/docker/linux_aarch64_cross_image/Dockerfile @@ -27,7 +27,7 @@ RUN \ RUN \ mkdir -p /usr/local/sysroot-aarch64-linux-gnu/usr && \ cd /usr/local/sysroot-aarch64-linux-gnu/usr && \ - wget --quiet -O - https://s3-eu-west-2.amazonaws.com/ml-cpp-artifacts/dependencies/usr-aarch64-linux-gnu-13.tar.bz2 | tar jxf - && \ + wget --quiet -O - https://s3-eu-west-2.amazonaws.com/ml-cpp-artifacts/dependencies/usr-aarch64-linux-gnu-15.tar.bz2 | tar jxf - && \ cd .. && \ ln -s usr/lib lib && \ ln -s usr/lib64 lib64 diff --git a/dev-tools/docker/linux_aarch64_native_builder/Dockerfile b/dev-tools/docker/linux_aarch64_native_builder/Dockerfile index 223ab2d974..ec326a4753 100644 --- a/dev-tools/docker/linux_aarch64_native_builder/Dockerfile +++ b/dev-tools/docker/linux_aarch64_native_builder/Dockerfile @@ -10,7 +10,7 @@ # # Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-linux-aarch64-native-build:13 +FROM docker.elastic.co/ml-dev/ml-linux-aarch64-native-build:15 MAINTAINER David Roberts diff --git a/dev-tools/docker/linux_aarch64_native_image/Dockerfile b/dev-tools/docker/linux_aarch64_native_image/Dockerfile index a8c75b5240..7f533f2249 100644 --- a/dev-tools/docker/linux_aarch64_native_image/Dockerfile +++ b/dev-tools/docker/linux_aarch64_native_image/Dockerfile @@ -78,14 +78,14 @@ RUN \ # Build Boost RUN \ cd ${build_dir} && \ - wget --quiet -O - https://boostorg.jfrog.io/artifactory/main/release/1.83.0/source/boost_1_83_0.tar.bz2 | tar jxf - && \ - cd boost_1_83_0 && \ + wget --quiet -O - https://archives.boost.io/release/1.86.0/source/boost_1_86_0.tar.bz2 | tar jxf - && \ + cd boost_1_86_0 && \ ./bootstrap.sh --without-libraries=context --without-libraries=coroutine --without-libraries=graph_parallel --without-libraries=mpi --without-libraries=python --without-icu && \ - sed -i -e 's|(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \\|(3ul)(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \\|' boost/unordered/detail/prime_fmod.hpp && \ + sed -i -e 's/{13ul/{3ul, 13ul/' boost/unordered/detail/prime_fmod.hpp&& \ ./b2 -j`nproc` --layout=versioned --disable-icu pch=off optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_FORTIFY_SOURCE=2 cxxflags='-std=gnu++17 -fstack-protector -march=armv8-a+crc+crypto' linkflags='-std=gnu++17 -Wl,-z,relro -Wl,-z,now' && \ ./b2 install --prefix=/usr/local/gcc103 --layout=versioned --disable-icu pch=off optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_FORTIFY_SOURCE=2 cxxflags='-std=gnu++17 -fstack-protector -march=armv8-a+crc+crypto' linkflags='-std=gnu++17 -Wl,-z,relro -Wl,-z,now' && \ cd .. && \ - rm -rf boost_1_83_0 + rm -rf boost_1_86_0 # Build patchelf RUN \ @@ -141,11 +141,12 @@ RUN \ # If the PyTorch branch is changed also update PYTORCH_BUILD_VERSION RUN \ cd ${build_dir} && \ - git -c advice.detachedHead=false clone --depth=1 --branch=v2.3.1 https://github.com/pytorch/pytorch.git && \ + git -c advice.detachedHead=false clone --depth=1 --branch=v2.5.1 https://github.com/pytorch/pytorch.git && \ cd pytorch && \ git submodule sync && \ git submodule update --init --recursive && \ sed -i -e 's/system(/strlen(/' torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp && \ + sed -i -e '104 i set(PYTHON_EXECUTABLE "/usr/local/bin/python3.10")' ./third_party/onnx/CMakeLists.txt && \ export BLAS=Eigen && \ export BUILD_TEST=OFF && \ export USE_FBGEMM=OFF && \ @@ -154,7 +155,7 @@ RUN \ export USE_MKLDNN=ON && \ export USE_QNNPACK=OFF && \ export USE_PYTORCH_QNNPACK=OFF && \ - export PYTORCH_BUILD_VERSION=2.3.1 && \ + export PYTORCH_BUILD_VERSION=2.5.1 && \ export PYTORCH_BUILD_NUMBER=1 && \ /usr/local/bin/python3.10 setup.py install && \ mkdir /usr/local/gcc103/include/pytorch && \ diff --git a/dev-tools/docker/linux_aarch64_native_tester/Dockerfile b/dev-tools/docker/linux_aarch64_native_tester/Dockerfile index eb528694e1..3cccfbfc36 100644 --- a/dev-tools/docker/linux_aarch64_native_tester/Dockerfile +++ b/dev-tools/docker/linux_aarch64_native_tester/Dockerfile @@ -10,7 +10,7 @@ # # Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-linux-aarch64-native-build:13 +FROM docker.elastic.co/ml-dev/ml-linux-aarch64-native-build:15 MAINTAINER David Roberts diff --git a/dev-tools/docker/linux_builder/Dockerfile b/dev-tools/docker/linux_builder/Dockerfile index f21591e988..f9f2ecdc0d 100644 --- a/dev-tools/docker/linux_builder/Dockerfile +++ b/dev-tools/docker/linux_builder/Dockerfile @@ -10,7 +10,7 @@ # # Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-linux-build:30 +FROM docker.elastic.co/ml-dev/ml-linux-build:32 MAINTAINER David Roberts diff --git a/dev-tools/docker/linux_dependency_builder_image/Dockerfile b/dev-tools/docker/linux_dependency_builder_image/Dockerfile index 9b57f974c1..33defeecc1 100644 --- a/dev-tools/docker/linux_dependency_builder_image/Dockerfile +++ b/dev-tools/docker/linux_dependency_builder_image/Dockerfile @@ -10,7 +10,7 @@ # # Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-linux-build:30 AS builder +FROM docker.elastic.co/ml-dev/ml-linux-build:32 AS builder # This is basically automating the setup instructions in build-setup/linux.md diff --git a/dev-tools/docker/linux_image/Dockerfile b/dev-tools/docker/linux_image/Dockerfile index 99e957a363..65ba958779 100644 --- a/dev-tools/docker/linux_image/Dockerfile +++ b/dev-tools/docker/linux_image/Dockerfile @@ -78,14 +78,14 @@ RUN \ # Build Boost RUN \ cd ${build_dir} && \ - wget --quiet -O - https://boostorg.jfrog.io/artifactory/main/release/1.83.0/source/boost_1_83_0.tar.bz2 | tar jxf - && \ - cd boost_1_83_0 && \ + wget --quiet -O - https://archives.boost.io/release/1.86.0/source/boost_1_86_0.tar.bz2 | tar jxf - && \ + cd boost_1_86_0 && \ ./bootstrap.sh --without-libraries=context --without-libraries=coroutine --without-libraries=graph_parallel --without-libraries=mpi --without-libraries=python --without-icu && \ - sed -i -e 's|(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \\|(3ul)(13ul)(29ul)(53ul)(97ul)(193ul)(389ul)(769ul)(1543ul)(3079ul)(6151ul)( \\|' boost/unordered/detail/prime_fmod.hpp && \ + sed -i -e 's/{13ul/{3ul, 13ul/' boost/unordered/detail/prime_fmod.hpp && \ ./b2 -j`nproc` --layout=versioned --disable-icu pch=off optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_FORTIFY_SOURCE=2 cxxflags='-std=gnu++17 -fstack-protector -msse4.2 -mfpmath=sse' cflags='-D__STDC_FORMAT_MACROS' linkflags='-std=gnu++17 -Wl,-z,relro -Wl,-z,now' && \ ./b2 install --prefix=/usr/local/gcc103 --layout=versioned --disable-icu pch=off optimization=speed inlining=full define=BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS define=BOOST_LOG_WITHOUT_DEBUG_OUTPUT define=BOOST_LOG_WITHOUT_EVENT_LOG define=BOOST_LOG_WITHOUT_SYSLOG define=BOOST_LOG_WITHOUT_IPC define=_FORTIFY_SOURCE=2 cxxflags='-std=gnu++17 -fstack-protector -msse4.2 -mfpmath=sse' cflags='-D__STDC_FORMAT_MACROS' linkflags='-std=gnu++17 -Wl,-z,relro -Wl,-z,now' && \ cd .. && \ - rm -rf boost_1_83_0 + rm -rf boost_1_86_0 # Build patchelf RUN \ @@ -154,11 +154,12 @@ gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS. # If the PyTorch branch is changed also update PYTORCH_BUILD_VERSION RUN \ cd ${build_dir} && \ - git -c advice.detachedHead=false clone --depth=1 --branch=v2.3.1 https://github.com/pytorch/pytorch.git && \ + git -c advice.detachedHead=false clone --depth=1 --branch=v2.5.1 https://github.com/pytorch/pytorch.git && \ cd pytorch && \ git submodule sync && \ git submodule update --init --recursive && \ sed -i -e 's/system(/strlen(/' torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp && \ + sed -i -e '104 i set(PYTHON_EXECUTABLE "/usr/local/bin/python3.10")' ./third_party/onnx/CMakeLists.txt && \ export BLAS=MKL && \ export BUILD_TEST=OFF && \ export BUILD_CAFFE2=OFF && \ @@ -168,7 +169,7 @@ RUN \ export USE_QNNPACK=OFF && \ export USE_PYTORCH_QNNPACK=OFF && \ export USE_XNNPACK=OFF && \ - export PYTORCH_BUILD_VERSION=2.3.1 && \ + export PYTORCH_BUILD_VERSION=2.5.1 && \ export PYTORCH_BUILD_NUMBER=1 && \ export MAX_JOBS=10 && \ /usr/local/bin/python3.10 setup.py install && \ diff --git a/dev-tools/docker/linux_tester/Dockerfile b/dev-tools/docker/linux_tester/Dockerfile index b92bafc56c..8c7f6c6eb4 100644 --- a/dev-tools/docker/linux_tester/Dockerfile +++ b/dev-tools/docker/linux_tester/Dockerfile @@ -10,7 +10,7 @@ # # Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-linux-build:30 +FROM docker.elastic.co/ml-dev/ml-linux-build:32 MAINTAINER David Roberts diff --git a/dev-tools/docker/macosx_builder/Dockerfile b/dev-tools/docker/macosx_builder/Dockerfile deleted file mode 100644 index 74f49698b4..0000000000 --- a/dev-tools/docker/macosx_builder/Dockerfile +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License -# 2.0 and the following additional limitation. Functionality enabled by the -# files subject to the Elastic License 2.0 may only be used in production when -# invoked by an Elasticsearch process with a license key installed that permits -# use of machine learning features. You may not use this file except in -# compliance with the Elastic License 2.0 and the foregoing additional -# limitation. -# - -# Increment the version here when a new tools/3rd party components image is built -FROM docker.elastic.co/ml-dev/ml-macosx-build:19 - -MAINTAINER David Roberts - -# Copy the current Git repository into the container -COPY . /ml-cpp/ - -# Tell the build we want to cross compile -ENV CPP_CROSS_COMPILE macosx - -ENV CMAKE_FLAGS -DCMAKE_TOOLCHAIN_FILE=/ml-cpp/cmake/darwin-x86_64.cmake - -# Pass through any version qualifier (default none) -ARG VERSION_QUALIFIER= - -# Pass through whether this is a snapshot build (default yes if not specified) -ARG SNAPSHOT=yes - -# Pass through ML debug option (default blank) -ARG ML_DEBUG= - -# Run the build -RUN \ - /ml-cpp/dev-tools/docker/docker_entrypoint.sh - diff --git a/dev-tools/docker/macosx_image/Dockerfile b/dev-tools/docker/macosx_image/Dockerfile deleted file mode 100644 index 4f5db531a0..0000000000 --- a/dev-tools/docker/macosx_image/Dockerfile +++ /dev/null @@ -1,59 +0,0 @@ -# -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License -# 2.0 and the following additional limitation. Functionality enabled by the -# files subject to the Elastic License 2.0 may only be used in production when -# invoked by an Elasticsearch process with a license key installed that permits -# use of machine learning features. You may not use this file except in -# compliance with the Elastic License 2.0 and the foregoing additional -# limitation. -# - -FROM ubuntu:20.04 - -# This is basically automating the setup instructions in build-setup/macos_cross_compiled.md - -MAINTAINER David Roberts - -# Make sure apt-get is up to date and required packages are installed -RUN \ - export DEBIAN_FRONTEND=noninteractive && \ - apt-get update && \ - apt-get install --no-install-recommends -y apt-utils automake autogen build-essential bzip2 git gobjc gpg-agent libtool software-properties-common unzip wget zip - -# Install clang -RUN \ - wget --quiet -O - http://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ - apt-add-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal main" && \ - apt-get install --no-install-recommends -y clang-8 libclang1-8 libllvm8 llvm-8 llvm-8-runtime - -# Add build dependencies transferred from native Mac build server -RUN \ - mkdir -p /usr/local/sysroot-x86_64-apple-macosx10.14/usr && \ - cd /usr/local/sysroot-x86_64-apple-macosx10.14/usr && \ - wget --quiet -O - https://s3-eu-west-2.amazonaws.com/ml-cpp-artifacts/dependencies/usr-x86_64-apple-macosx10.14-10.tar.bz2 | tar jxf - && \ - wget --quiet -O - https://s3-eu-west-2.amazonaws.com/ml-cpp-artifacts/dependencies/xcode-x86_64-apple-macosx10.14-1.tar.bz2 | tar jxf - && \ - wget --quiet -O - https://s3-eu-west-2.amazonaws.com/ml-cpp-artifacts/dependencies/sdk-x86_64-apple-macosx10.14-1.tar.bz2 | tar jxf - - -# Build cctools-port -RUN \ - git clone https://github.com/tpoechtrager/cctools-port.git && \ - cd cctools-port/cctools && \ - git checkout 949.0.1-ld64-530 && \ - export CC=clang-8 && \ - export CXX=clang++-8 && \ - ./autogen.sh && \ - ./configure --target=x86_64-apple-macosx10.14 --with-llvm-config=/usr/bin/llvm-config-8 && \ - make -j`nproc` && \ - make install && \ - cd ../.. && \ - rm -rf cctools-port - -# Install CMake -# v3.19.2 minimum is required -RUN \ - wget --quiet https://github.com/Kitware/CMake/releases/download/v3.23.2/cmake-3.23.2-Linux-x86_64.sh && \ - chmod +x cmake-3.23.2-Linux-x86_64.sh && \ - ./cmake-3.23.2-Linux-x86_64.sh --skip-license --prefix=/usr/local && \ - rm -f cmake-3.23.2-Linux-x86_64.sh - diff --git a/dev-tools/docker_build.sh b/dev-tools/docker_build.sh index 47f1064f91..fc1eac34aa 100755 --- a/dev-tools/docker_build.sh +++ b/dev-tools/docker_build.sh @@ -10,8 +10,7 @@ # limitation. # -# Builds the machine learning C++ code for Linux or macOS in a Docker -# container. +# Builds the machine learning C++ code for Linux in a Docker container. # # The output .zip files are then copied out of the container to the # location in the current repository that they'd be in had they been @@ -20,7 +19,7 @@ # Finally, the Docker container used for the build is deleted. usage() { - echo "Usage: $0 linux|linux_aarch64_cross|linux_aarch64_native|macosx ..." + echo "Usage: $0 linux|linux_aarch64_cross|linux_aarch64_native ..." exit 1 } @@ -30,7 +29,7 @@ while [ -n "$1" ] do case "$1" in - linux|linux_aarch64_cross|linux_aarch64_native|macosx) + linux|linux_aarch64_cross|linux_aarch64_native) PLATFORMS="$1 $PLATFORMS" ;; *) diff --git a/dev-tools/download_macos_deps.sh b/dev-tools/download_macos_deps.sh index ea2f7d29ce..0b7294fdab 100755 --- a/dev-tools/download_macos_deps.sh +++ b/dev-tools/download_macos_deps.sh @@ -28,7 +28,7 @@ case `uname -m` in ARCHIVE=local-x86_64-apple-macosx12.0-1.tar.bz2 ;; arm64) - ARCHIVE=local-arm64-apple-macosx11.1-10.tar.bz2 + ARCHIVE=local-arm64-apple-macosx11.1-12.tar.bz2 ;; *) diff --git a/dev-tools/download_windows_deps.ps1 b/dev-tools/download_windows_deps.ps1 index ac04854f19..dbda289947 100755 --- a/dev-tools/download_windows_deps.ps1 +++ b/dev-tools/download_windows_deps.ps1 @@ -9,11 +9,11 @@ # limitation. # $ErrorActionPreference="Stop" -$Archive="usr-x86_64-windows-2016-13.zip" +$Archive="usr-x86_64-windows-2016-15.zip" $Destination="C:\" -# If PyTorch is not version 2.3.1 then we need the latest download +# If PyTorch is not version 2.5.1 then we need the latest download if (!(Test-Path "$Destination\usr\local\include\pytorch\torch\csrc\api\include\torch\version.h") -Or - !(Select-String -Path "$Destination\usr\local\include\pytorch\torch\csrc\api\include\torch\version.h" -Pattern "2.3.1" -Quiet)) { + !(Select-String -Path "$Destination\usr\local\include\pytorch\torch\csrc\api\include\torch\version.h" -Pattern "2.5.1" -Quiet)) { Remove-Item "$Destination\usr" -Recurse -Force -ErrorAction Ignore $ZipSource="https://storage.googleapis.com/elastic-ml-public/dependencies/$Archive" $ZipDestination="$Env:TEMP\$Archive" diff --git a/dev-tools/strip_binaries.sh b/dev-tools/strip_binaries.sh index eef5933474..ca4d7103fe 100755 --- a/dev-tools/strip_binaries.sh +++ b/dev-tools/strip_binaries.sh @@ -21,13 +21,8 @@ case `uname` in ;; Linux) - if [ "$CPP_CROSS_COMPILE" = macosx ] ; then - EXE_DIR="$ML_APP_NAME.app/Contents/MacOS" - DYNAMIC_LIB_DIR="$ML_APP_NAME.app/Contents/lib" - else - EXE_DIR=bin - DYNAMIC_LIB_DIR=lib - fi + EXE_DIR=bin + DYNAMIC_LIB_DIR=lib ;; esac @@ -91,23 +86,6 @@ case `uname` in strip --strip-unneeded $LIBRARY objcopy --add-gnu-debuglink="$LIBRARY-debug" "$LIBRARY" done - elif [ "$CPP_CROSS_COMPILE" = macosx ] ; then - CROSS_TARGET_PLATFORM=x86_64-apple-macosx10.14 - for PROGRAM in `ls -1d "$EXE_DIR"/* | grep -v '\.dSYM$'` - do - echo "Stripping $PROGRAM" - dsymutil-8 $PROGRAM - /usr/local/bin/$CROSS_TARGET_PLATFORM-strip -u -r $PROGRAM - done - for LIBRARY in `ls -1d "$DYNAMIC_LIB_DIR"/* | grep -v '\.dSYM$'` - do - echo "Stripping $LIBRARY" - case $LIBRARY in - *Ml*) - dsymutil-8 $LIBRARY - esac - /usr/local/bin/$CROSS_TARGET_PLATFORM-strip -x $LIBRARY - done else CROSS_TARGET_PLATFORM=$CPP_CROSS_COMPILE-linux-gnu for PROGRAM in `ls -1 "$EXE_DIR"/* | egrep -v "$EXE_DIR"'/core|-debug$'` diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 6f98ce1ff8..a77ed44de5 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -28,6 +28,44 @@ //=== Regressions +== {es} version 8.19.0 + +=== Enhancements + +* Track memory used in the hierarchical results normalizer. (See {ml-pull}2831[#2831].) + + +== {es} version 8.19.0 + +=== Enhancements + +* Better messaging regarding OOM process termination. (See {ml-pull}2841[#2841].) + +== {es} version 8.18.0 + +=== Enhancements + +* Update the PyTorch library to version 2.5.1. (See {ml-pull}2783[#2798], {ml-pull}2799[#2799].) +* Upgrade Boost libraries to version 1.86. (See {ml-pull}2780[#2780], {ml-pull}2779[#2779].) +* Drop support for macOS Intel builds. (See {ml-pull}2795[#2795].) + +== {es} version 8.17.7 + +=== Enhancements +* Restrict file system access for PyTorch models (See {ml-pull}2851[#2851].) + +== {es} version 8.16.6 + +=== Bug Fixes + +* Correct handling of config updates. (See {ml-pull}2821[#2821].) + +== {es} version 8.16.4 + +=== Bug Fixes + +* Increase the upper limits for the Boost.JSON SAX parser. (See {ml-pull}2809[#2809].) + == {es} version 8.16.0 === Enhancements @@ -35,6 +73,16 @@ * Allow the user to force a detector to shift time series state by a specific amount. (See {ml-pull}2695[#2695].) +=== Bug Fixes + +* Allow for pytorch_inference results to include zero-dimensional tensors. + +== {es} version 8.15.4 + +=== Bug Fixes + +* Fix parameter initialization for large forecasting models. (See {ml-pull}2759[#2759].) + == {es} version 8.15.2 === Enhancements diff --git a/gradle.properties b/gradle.properties index cf665d57c6..76bbd6229d 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.daemon=false -elasticsearchVersion=8.16.0 +elasticsearchVersion=8.19.7 artifactName=ml-cpp diff --git a/include/api/CAnomalyJob.h b/include/api/CAnomalyJob.h index e4f1f452cb..424b065339 100644 --- a/include/api/CAnomalyJob.h +++ b/include/api/CAnomalyJob.h @@ -37,6 +37,7 @@ namespace CAnomalyJobTest { struct testParsePersistControlMessageArgs; +struct testConfigUpdate; struct testOutputBucketResultsUntilGivenIncompleteInitialBucket; } @@ -427,6 +428,8 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { //! be pruned, i.e. those which are so old as to be effectively dead. void pruneAllModels(std::size_t buckets = 0); + const model::CHierarchicalResultsNormalizer& normalizer() const; + private: //! The job ID std::string m_JobId; @@ -521,6 +524,7 @@ class API_EXPORT CAnomalyJob : public CDataProcessor { core_t::TTime m_InitialLastFinalisedBucketEndTime{0}; // Test case access + friend struct CAnomalyJobTest::testConfigUpdate; friend struct CAnomalyJobTest::testParsePersistControlMessageArgs; friend struct CAnomalyJobTest::testOutputBucketResultsUntilGivenIncompleteInitialBucket; diff --git a/include/api/CAnomalyJobConfig.h b/include/api/CAnomalyJobConfig.h index 3fe56a5402..7f9cae0edd 100644 --- a/include/api/CAnomalyJobConfig.h +++ b/include/api/CAnomalyJobConfig.h @@ -239,7 +239,19 @@ class API_EXPORT CAnomalyJobConfig { } void initRuleFilters(const CDetectionRulesJsonParser::TStrPatternSetUMap& ruleFilters) { - m_RuleFilters = ruleFilters; + // Update or insert values that are in the new map - we never delete filters at this level. + // Note that we can't simply assign "m_RuleFilters = ruleFilters", as that would result in + // the pattern set objects being destroyed and, as they are referenced by the anomaly detector models, + // this is a bad thing. + for (const auto& kv : ruleFilters) { + CDetectionRulesJsonParser::TStrPatternSetUMap::iterator itr = + m_RuleFilters.find(kv.first); + if (itr != m_RuleFilters.end()) { + itr->second = kv.second; + } else { + m_RuleFilters.insert(kv); + } + } } void initScheduledEvents(const TStrDetectionRulePrVec& scheduledEvents) { @@ -249,19 +261,6 @@ class API_EXPORT CAnomalyJobConfig { //! Parse a JSON value representing an entire analysis config object. void parse(const json::value& json); - //! Return a JSON string representing the analysis config - const std::string& getAnalysisConfig(); - - //! Reparse the detector configuration object from within a stored - //! string representing the analysis config object. - //! This is necessary to correctly reinitialise scoped rule objects - //! folowing an update of the fiter rules configuration. - bool reparseDetectorsFromStoredConfig(const std::string& analysisConfig); - - void setConfig(const std::string& analysisConfigString) { - m_AnalysisConfigString = analysisConfigString; - } - core_t::TTime bucketSpan() const { return m_BucketSpan; } //! Return the size of the model prune window expressed as a whole number of seconds. diff --git a/include/api/CResultNormalizer.h b/include/api/CResultNormalizer.h index b4fbf6ddac..276b8196e3 100644 --- a/include/api/CResultNormalizer.h +++ b/include/api/CResultNormalizer.h @@ -81,7 +81,8 @@ class API_EXPORT CResultNormalizer { public: CResultNormalizer(const model::CAnomalyDetectorModelConfig& modelConfig, - CSimpleOutputWriter& outputWriter); + CSimpleOutputWriter& outputWriter, + model::CLimits& limits); //! Initialise the system change normalizer bool initNormalizer(const std::string& stateFileName); diff --git a/include/core/BoostJsonConstants.h b/include/core/BoostJsonConstants.h index 3bc75f33d9..059f0aa499 100644 --- a/include/core/BoostJsonConstants.h +++ b/include/core/BoostJsonConstants.h @@ -13,6 +13,7 @@ #define INCLUDED_ml_core_CBoostJsonConstants_h #include +#include namespace ml { namespace core { @@ -21,16 +22,16 @@ namespace boost_json_constants { // Constants that set upper limits for Boost.JSON SAX style parsing // The maximum number of elements allowed in an object -constexpr std::size_t MAX_OBJECT_SIZE = 1'000'000; +constexpr std::size_t MAX_OBJECT_SIZE = std::numeric_limits::max(); // The maximum number of elements allowed in an array -constexpr std::size_t MAX_ARRAY_SIZE = 1'000'000; +constexpr std::size_t MAX_ARRAY_SIZE = std::numeric_limits::max(); // The maximum number of characters allowed in a key -constexpr std::size_t MAX_KEY_SIZE = 1 << 10; +constexpr std::size_t MAX_KEY_SIZE = std::numeric_limits::max(); // The maximum number of characters allowed in a string -constexpr std::size_t MAX_STRING_SIZE = 1 << 30; +constexpr std::size_t MAX_STRING_SIZE = std::numeric_limits::max(); } } } diff --git a/include/core/CFlatPrefixTree.h b/include/core/CFlatPrefixTree.h index 7addcfed58..fc065e0eda 100644 --- a/include/core/CFlatPrefixTree.h +++ b/include/core/CFlatPrefixTree.h @@ -63,6 +63,8 @@ class CORE_EXPORT CFlatPrefixTree { SNode(char c, char type, std::uint32_t next); + std::uint64_t checksum() const; + bool operator<(char rhs) const; char s_Char; char s_Type; @@ -120,6 +122,8 @@ class CORE_EXPORT CFlatPrefixTree { //! Pretty-prints the tree. std::string print() const; + std::uint64_t checksum() const; + private: //! The recursive building helper. void buildRecursively(const TStrVec& prefixes, diff --git a/include/core/CPatternSet.h b/include/core/CPatternSet.h index 8b00d6564d..a3d1e0aa04 100644 --- a/include/core/CPatternSet.h +++ b/include/core/CPatternSet.h @@ -61,6 +61,8 @@ class CORE_EXPORT CPatternSet { //! Clears the set. void clear(); + std::uint64_t checksum() const; + private: void sortAndPruneDuplicates(TStrVec& keys); diff --git a/include/model/CAnomalyDetectorModel.h b/include/model/CAnomalyDetectorModel.h index 2e760151ed..cb6aa71a30 100644 --- a/include/model/CAnomalyDetectorModel.h +++ b/include/model/CAnomalyDetectorModel.h @@ -492,11 +492,19 @@ class MODEL_EXPORT CAnomalyDetectorModel { //! Apply time shift at the time \p time by \p shift amount of seconds. virtual void shiftTime(core_t::TTime time, core_t::TTime shift) = 0; + //! Check if the rule has been applied. + bool checkRuleApplied(const CDetectionRule& rule) const; + + //! Mark the rule as applied. + void markRuleApplied(const CDetectionRule& rule); + protected: using TStrCRef = std::reference_wrapper; using TSizeSize1VecUMap = boost::unordered_map; using TFeatureSizeSize1VecUMapPr = std::pair; using TFeatureSizeSize1VecUMapPrVec = std::vector; + using TUint64TTimePr = std::pair; + using TUint64TTimePrVec = std::vector; //! \brief The feature models. struct MODEL_EXPORT SFeatureModels { @@ -710,6 +718,9 @@ class MODEL_EXPORT CAnomalyDetectorModel { CAnnotation::EEvent type, const std::string& annotation) = 0; + TUint64TTimePrVec& appliedRuleChecksums(); + const TUint64TTimePrVec& appliedRuleChecksums() const; + private: using TModelParamsCRef = std::reference_wrapper; @@ -738,6 +749,9 @@ class MODEL_EXPORT CAnomalyDetectorModel { //! The influence calculators to use for each feature which is being //! modeled. TFeatureInfluenceCalculatorCPtrPrVecVec m_InfluenceCalculators; + + //! Checksums of the rules that should be applied only once. + TUint64TTimePrVec m_AppliedRuleChecksums; }; class CMemoryCircuitBreaker : public core::CMemoryCircuitBreaker { diff --git a/include/model/CDetectionRule.h b/include/model/CDetectionRule.h index f7dd4f0a8b..4bf3dd0165 100644 --- a/include/model/CDetectionRule.h +++ b/include/model/CDetectionRule.h @@ -65,6 +65,9 @@ class MODEL_EXPORT CDetectionRule { //! Add a condition. void addCondition(const CRuleCondition& condition); + //! Clear conditions. + void clearConditions(); + //! Set callback function to apply some action to a supplied time series model. void setCallback(TCallback cb); @@ -88,6 +91,9 @@ class MODEL_EXPORT CDetectionRule { //! Pretty-print the rule. std::string print() const; + //! Checksum the rule. + std::uint64_t checksum() const; + private: std::string printAction() const; @@ -105,6 +111,9 @@ class MODEL_EXPORT CDetectionRule { //! Callback function to apply a change to a model based on the rule action. TCallback m_Callback; + + //! The time shift to apply to the model. + core_t::TTime m_TimeShift{0}; }; } } diff --git a/include/model/CHierarchicalResultsAggregator.h b/include/model/CHierarchicalResultsAggregator.h index a83d01ae61..698fddd729 100644 --- a/include/model/CHierarchicalResultsAggregator.h +++ b/include/model/CHierarchicalResultsAggregator.h @@ -12,6 +12,8 @@ #ifndef INCLUDED_ml_model_CHierarchicalResultsAggregator_h #define INCLUDED_ml_model_CHierarchicalResultsAggregator_h +#include + #include #include #include diff --git a/include/model/CHierarchicalResultsLevelSet.h b/include/model/CHierarchicalResultsLevelSet.h index 3083ca19d3..48da35f406 100644 --- a/include/model/CHierarchicalResultsLevelSet.h +++ b/include/model/CHierarchicalResultsLevelSet.h @@ -12,6 +12,7 @@ #ifndef INCLUDED_ml_model_CHierarchicalResultsLevelSet_h #define INCLUDED_ml_model_CHierarchicalResultsLevelSet_h +#include "model/ImportExport.h" #include #include @@ -21,6 +22,10 @@ #include +namespace CHierarchicalResultsLevelSetTest { +struct testMemoryUsage; +} + namespace ml { namespace model { @@ -240,6 +245,26 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor { return maths::common::CChecksum::calculate(seed, m_LeafSet); } + void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const { + mem->setName("Hierarchical Results Level Set Memory Usage"); + core::memory_debug::dynamicSize("m_BucketElement", m_BucketElement, mem); + core::memory_debug::dynamicSize("m_InfluencerBucketSet", m_InfluencerBucketSet, mem); + core::memory_debug::dynamicSize("m_InfluencerSet", m_InfluencerSet, mem); + core::memory_debug::dynamicSize("m_PartitionSet", m_PartitionSet, mem); + core::memory_debug::dynamicSize("m_PersonSet", m_PersonSet, mem); + core::memory_debug::dynamicSize("m_LeafSet", m_LeafSet, mem); + } + + std::size_t memoryUsage() const { + std::size_t mem = core::memory::dynamicSize(m_BucketElement); + mem += core::memory::dynamicSize(m_InfluencerBucketSet); + mem += core::memory::dynamicSize(m_InfluencerSet); + mem += core::memory::dynamicSize(m_PartitionSet); + mem += core::memory::dynamicSize(m_PersonSet); + mem += core::memory::dynamicSize(m_LeafSet); + return mem; + } + private: //! Get an element of \p set by name. static const T* element(const TWordTypePrVec& set, const std::string& name) { @@ -299,6 +324,8 @@ class CHierarchicalResultsLevelSet : public CHierarchicalResultsVisitor { //! The container for leaves comprising distinct named //! (partition, person) field name pairs. TWordTypePrVec m_LeafSet; + + friend struct CHierarchicalResultsLevelSetTest::testMemoryUsage; }; template diff --git a/include/model/CHierarchicalResultsNormalizer.h b/include/model/CHierarchicalResultsNormalizer.h index b4fc0d1789..4bd249df05 100644 --- a/include/model/CHierarchicalResultsNormalizer.h +++ b/include/model/CHierarchicalResultsNormalizer.h @@ -12,15 +12,16 @@ #ifndef INCLUDED_ml_model_CHierarchicalResultsNormalizer_h #define INCLUDED_ml_model_CHierarchicalResultsNormalizer_h +#include #include #include #include +#include +#include #include -#include #include -#include #include namespace ml { @@ -44,6 +45,10 @@ struct MODEL_EXPORT SNormalizer { //! Compute a checksum for this object. uint64_t checksum() const; + void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const; + + std::size_t memoryUsage() const; + std::string s_Description; TNormalizerPtr s_Normalizer; }; @@ -84,6 +89,7 @@ struct MODEL_EXPORT SNormalizer { //! normalizers is negligible. class MODEL_EXPORT CHierarchicalResultsNormalizer : public CHierarchicalResultsLevelSet, + public CMonitoredResource, private core::CNonCopyable { public: using TBase = CHierarchicalResultsLevelSet; @@ -106,9 +112,10 @@ class MODEL_EXPORT CHierarchicalResultsNormalizer enum ERestoreOutcome { E_Ok = 0, E_Corrupt = 1, E_Incomplete = 2 }; public: - CHierarchicalResultsNormalizer(const CAnomalyDetectorModelConfig& modelConfig); + CHierarchicalResultsNormalizer(CLimits& limits, + const CAnomalyDetectorModelConfig& modelConfig); - ~CHierarchicalResultsNormalizer() override = default; + ~CHierarchicalResultsNormalizer() override; //! Add a job for the subsequent invocations of the normalizer. void setJob(EJob job); @@ -167,6 +174,19 @@ class MODEL_EXPORT CHierarchicalResultsNormalizer const std::string& functionName, const std::string& valueFieldName) const; + //! Get the memory used by this hierarchical results normalizer. + void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const override; + + //! Return the total memory usage. + std::size_t memoryUsage() const override; + + //! Get the static size of this object. + std::size_t staticSize() const override; + + //! Update the overall model size stats with information from the + //! hierarchical results normalizer. + void updateModelSizeStats(CResourceMonitor::SModelSizeStats& modelSizeStats) const override; + private: //! \brief Creates new normalizer instances. class CNormalizerFactory { @@ -210,15 +230,18 @@ class MODEL_EXPORT CHierarchicalResultsNormalizer static std::string leafCue(const TWord& word); private: + //! Configurable limits + CLimits& m_Limits; + //! The jobs that the normalizer will perform when invoked //! can be: update, normalize or update + normalize. - EJob m_Job; + EJob m_Job{E_NoOp}; //! The model configuration file. const CAnomalyDetectorModelConfig& m_ModelConfig; //! Whether the last update of the quantiles has caused a big change. - bool m_HasLastUpdateCausedBigChange; + bool m_HasLastUpdateCausedBigChange{false}; }; } } diff --git a/include/model/CPopulationModel.h b/include/model/CPopulationModel.h index 347f5509bd..979fbc35e0 100644 --- a/include/model/CPopulationModel.h +++ b/include/model/CPopulationModel.h @@ -23,8 +23,6 @@ #include #include -#include -#include #include #include diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 1c6375691d..5c7583888b 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -177,6 +177,9 @@ class MODEL_EXPORT CResourceMonitor { //! by calling this once per bucket processed until the initially requested memory limit is reached. void decreaseMargin(core_t::TTime elapsedTime); + //! Returns the sum of used memory plus any extra memory + std::size_t totalMemory() const; + private: using TMonitoredResourcePtrSizeUMap = boost::unordered_map; @@ -218,9 +221,6 @@ class MODEL_EXPORT CResourceMonitor { //! Get the low memory limit with margin applied. std::size_t lowLimit() const; - //! Returns the sum of used memory plus any extra memory - std::size_t totalMemory() const; - //! Adjusts the amount of memory reported to take into //! account the current value of the byte limit margin and the effects //! of background persistence. diff --git a/include/model/CRuleCondition.h b/include/model/CRuleCondition.h index db675760e1..d7b3937e4b 100644 --- a/include/model/CRuleCondition.h +++ b/include/model/CRuleCondition.h @@ -73,6 +73,8 @@ class MODEL_EXPORT CRuleCondition { std::size_t cid, core_t::TTime time) const; + std::uint64_t checksum() const; + private: bool testValue(double value) const; std::string print(ERuleConditionAppliesTo appliesTo) const; diff --git a/include/model/CRuleScope.h b/include/model/CRuleScope.h index 8cf5444c81..0ea0a47506 100644 --- a/include/model/CRuleScope.h +++ b/include/model/CRuleScope.h @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -58,6 +59,8 @@ class MODEL_EXPORT CRuleScope { //! Pretty-print the scope. std::string print() const; + std::uint64_t checksum() const; + private: //! A vector that holds the triple of the field, filter and its type. TStrPatternSetCRefFilterTypeTrVec m_Scope; diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 82757df65e..da08829421 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -146,7 +146,7 @@ CAnomalyJob::CAnomalyJob(const std::string& jobId, m_MaxDetectors{std::numeric_limits::max()}, m_PersistenceManager{persistenceManager}, m_MaxQuantileInterval{maxQuantileInterval}, m_LastNormalizerPersistTime{core::CTimeUtils::now()}, m_LatestRecordTime{0}, - m_LastResultsTime{0}, m_Aggregator{modelConfig}, m_Normalizer{modelConfig} { + m_LastResultsTime{0}, m_Aggregator{modelConfig}, m_Normalizer{limits, modelConfig} { m_JsonOutputWriter.limitNumberRecords(maxAnomalyRecords); m_Limits.resourceMonitor().memoryUsageReporter(std::bind( @@ -474,8 +474,6 @@ void CAnomalyJob::updateConfig(const std::string& config) { if (configUpdater.update(config) == false) { LOG_ERROR(<< "Failed to update configuration"); } - const std::string& analysisConfig = m_JobConfig.analysisConfig().getAnalysisConfig(); - m_JobConfig.analysisConfig().reparseDetectorsFromStoredConfig(analysisConfig); } void CAnomalyJob::advanceTime(const std::string& time_) { @@ -1651,6 +1649,9 @@ void CAnomalyJob::pruneAllModels(std::size_t buckets) { (buckets == 0) ? detector->pruneModels() : detector->pruneModels(buckets); } } +const model::CHierarchicalResultsNormalizer& CAnomalyJob::normalizer() const { + return m_Normalizer; +} CAnomalyJob::TAnomalyDetectorPtr CAnomalyJob::makeDetector(const model::CAnomalyDetectorModelConfig& modelConfig, diff --git a/lib/api/CAnomalyJobConfig.cc b/lib/api/CAnomalyJobConfig.cc index 9d05bc53c6..41cd8791d5 100644 --- a/lib/api/CAnomalyJobConfig.cc +++ b/lib/api/CAnomalyJobConfig.cc @@ -589,7 +589,6 @@ bool CAnomalyJobConfig::parse(const std::string& jsonStr) { auto analysisConfig = parameters[ANALYSIS_CONFIG].jsonObject(); if (analysisConfig != nullptr) { - m_AnalysisConfig.setConfig(toString(*analysisConfig)); m_AnalysisConfig.parse(*analysisConfig); } @@ -724,27 +723,6 @@ void CAnomalyJobConfig::CAnalysisConfig::parseDetectorsConfig(const json::value& } } -const std::string& CAnomalyJobConfig::CAnalysisConfig::getAnalysisConfig() { - return m_AnalysisConfigString; -} - -bool CAnomalyJobConfig::CAnalysisConfig::reparseDetectorsFromStoredConfig(const std::string& analysisConfig) { - json::value doc; - bool ok = core::CBoostJsonParser::parse(analysisConfig, doc); - if (ok == false) { - LOG_ERROR(<< "An error occurred while parsing anomaly job config from JSON: \"" - << analysisConfig << "\""); - return false; - } - - auto parameters = ANALYSIS_CONFIG_READER.read(doc); - auto detectorsConfig = parameters[DETECTORS].jsonObject(); - if (detectorsConfig != nullptr) { - this->parseDetectorsConfig(*detectorsConfig); - } - return true; -} - void CAnomalyJobConfig::CAnalysisConfig::parse(const json::value& analysisConfig) { auto parameters = ANALYSIS_CONFIG_READER.read(analysisConfig); // We choose to ignore any errors here parsing the time duration string as diff --git a/lib/api/CConfigUpdater.cc b/lib/api/CConfigUpdater.cc index 17cb847572..dc3ae7e288 100644 --- a/lib/api/CConfigUpdater.cc +++ b/lib/api/CConfigUpdater.cc @@ -41,41 +41,53 @@ bool CConfigUpdater::update(const std::string& json) { } json::object obj = doc.as_object(); + for (const auto& kv : obj) { + if (kv.key() == CAnomalyJobConfig::MODEL_PLOT_CONFIG) { + LOG_TRACE(<< "Updating model plot config"); - if (obj.contains(CAnomalyJobConfig::MODEL_PLOT_CONFIG)) { - if (obj[CAnomalyJobConfig::MODEL_PLOT_CONFIG].is_object() == false) { - LOG_ERROR(<< "Input error: expected " << CAnomalyJobConfig::MODEL_PLOT_CONFIG - << " to be JSON object but input was '" << json - << "'. Please report this problem."); - return false; - } - const json::value& value = obj[CAnomalyJobConfig::MODEL_PLOT_CONFIG]; + if (kv.value().is_object() == false) { + LOG_ERROR(<< "Input error: expected " << CAnomalyJobConfig::MODEL_PLOT_CONFIG + << " to be JSON object but input was '" << json + << "'. Please report this problem."); + return false; + } - m_JobConfig.modelPlotConfig().parse(value); - const ml::api::CAnomalyJobConfig::CModelPlotConfig& modelPlotConfig = - m_JobConfig.modelPlotConfig(); - m_ModelConfig.configureModelPlot(modelPlotConfig.enabled(), - modelPlotConfig.annotationsEnabled(), - modelPlotConfig.terms()); - } else if (obj.contains(CAnomalyJobConfig::FILTERS)) { - if (m_JobConfig.parseFilterConfig(json) == false) { - LOG_ERROR(<< "Failed to parse filter config update: " << json); - return false; - } - m_JobConfig.initRuleFilters(); - } else if (obj.contains(CAnomalyJobConfig::EVENTS)) { - if (m_JobConfig.parseEventConfig(json) == false) { - LOG_ERROR(<< "Failed to parse events config update: " << json); + m_JobConfig.modelPlotConfig().parse(kv.value()); + const ml::api::CAnomalyJobConfig::CModelPlotConfig& modelPlotConfig = + m_JobConfig.modelPlotConfig(); + m_ModelConfig.configureModelPlot(modelPlotConfig.enabled(), + modelPlotConfig.annotationsEnabled(), + modelPlotConfig.terms()); + } else if (kv.key() == CAnomalyJobConfig::FILTERS) { + LOG_TRACE(<< "Updating filters config"); + + if (m_JobConfig.parseFilterConfig(json) == false) { + LOG_ERROR(<< "Failed to parse filter config update: " << json); + return false; + } + LOG_TRACE(<< "Calling m_JobConfig.initRuleFilters"); + + m_JobConfig.initRuleFilters(); + + LOG_TRACE(<< "Done calling m_JobConfig.initRuleFilters"); + + } else if (kv.key() == CAnomalyJobConfig::EVENTS) { + LOG_TRACE(<< "Updating events config"); + + if (m_JobConfig.parseEventConfig(json) == false) { + LOG_ERROR(<< "Failed to parse events config update: " << json); + return false; + } + m_JobConfig.initScheduledEvents(); + } else if (kv.key() == CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_RULES) { + LOG_TRACE(<< "Updating detector rules config"); + return m_JobConfig.analysisConfig().parseRulesUpdate(kv.value()); + } else { + LOG_ERROR(<< "Unexpected JSON update message: " << json); return false; } - m_JobConfig.initScheduledEvents(); - } else if (obj.contains(CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_RULES)) { - return m_JobConfig.analysisConfig().parseRulesUpdate( - obj[CAnomalyJobConfig::CAnalysisConfig::CDetectorConfig::DETECTOR_RULES]); - } else { - LOG_ERROR(<< "Unexpected JSON update message: " << json); - return false; } + return true; } } diff --git a/lib/api/CResultNormalizer.cc b/lib/api/CResultNormalizer.cc index 61dbae43cb..1df095d0d8 100644 --- a/lib/api/CResultNormalizer.cc +++ b/lib/api/CResultNormalizer.cc @@ -37,11 +37,12 @@ const std::string CResultNormalizer::INFLUENCER_LEVEL("infl"); const std::string CResultNormalizer::ZERO("0"); CResultNormalizer::CResultNormalizer(const model::CAnomalyDetectorModelConfig& modelConfig, - CSimpleOutputWriter& outputWriter) + CSimpleOutputWriter& outputWriter, + model::CLimits& limits) : m_ModelConfig(modelConfig), m_OutputWriter(outputWriter), m_WriteFieldNames(true), m_OutputFieldNormalizedScore(m_OutputFields[NORMALIZED_SCORE_NAME]), - m_Normalizer(m_ModelConfig) { + m_Normalizer(limits, m_ModelConfig) { } bool CResultNormalizer::initNormalizer(const std::string& stateFileName) { diff --git a/lib/api/unittest/CAnomalyJobConfigTest.cc b/lib/api/unittest/CAnomalyJobConfigTest.cc index 3699baa6f4..78b8fdcb07 100644 --- a/lib/api/unittest/CAnomalyJobConfigTest.cc +++ b/lib/api/unittest/CAnomalyJobConfigTest.cc @@ -57,51 +57,6 @@ BOOST_AUTO_TEST_CASE(testIntervalStagger) { BOOST_REQUIRE_EQUAL(job3Config.intervalStagger(), job1Config.intervalStagger()); } -BOOST_AUTO_TEST_CASE(testReparseDetectorsFromStoredConfig) { - const std::string validAnomalyJobConfigWithCustomRuleFilter{ - "{\"job_id\":\"mean_bytes_by_clientip\",\"job_type\":\"anomaly_detector\",\"job_version\":\"8.0.0\",\"create_time\":1604671135245,\"description\":\"mean bytes by clientip\"," - "\"analysis_config\":{\"bucket_span\":\"3h\",\"detectors\":[{\"detector_description\":\"mean(bytes) by clientip\",\"function\":\"mean\",\"field_name\":\"bytes\",\"by_field_name\":\"clientip\"," - "\"custom_rules\":[{\"actions\":[\"skip_result\"],\"scope\":{\"clientip\":{\"filter_id\":\"safe_ips\",\"filter_type\":\"include\"}},\"conditions\":[{\"applies_to\":\"actual\",\"operator\":\"lt\",\"value\":10.0}]}]," - "\"detector_index\":0}],\"influencers\":[\"clientip\"]},\"analysis_limits\":{\"model_memory_limit\":\"42mb\",\"categorization_examples_limit\":4}," - "\"data_description\":{\"time_field\":\"timestamp\",\"time_format\":\"epoch_ms\"},\"model_plot_config\":{\"enabled\":false,\"annotations_enabled\":false}," - "\"model_snapshot_retention_days\":10,\"daily_model_snapshot_retention_after_days\":1,\"results_index_name\":\"shared\",\"allow_lazy_open\":false}"}; - - // Expect parsing to succeed if the filter referenced by the custom rule can be found in the filter map. - const std::string filterConfigJson{"{\"filters\":[{\"filter_id\":\"safe_ips\",\"items\":[]}]}"}; - ml::api::CAnomalyJobConfig jobConfig; - BOOST_TEST_REQUIRE(jobConfig.parseFilterConfig(filterConfigJson)); - - const std::string validScheduledEventsConfigJson{"{\"events\":[" - "]}"}; - - BOOST_TEST_REQUIRE(jobConfig.parseEventConfig(validScheduledEventsConfigJson)); - - jobConfig.analysisConfig().init(jobConfig.ruleFilters(), jobConfig.scheduledEvents()); - - BOOST_REQUIRE_MESSAGE(jobConfig.parse(validAnomalyJobConfigWithCustomRuleFilter), - "Cannot parse JSON job config!"); - BOOST_TEST_REQUIRE(jobConfig.isInitialized()); - - // Expect parsing to fail if the analysis config JSON string is invalid - const std::string inValidAnalysisConfigString{"{\"bucket_span\":\"1h\""}; - BOOST_TEST_REQUIRE(!jobConfig.analysisConfig().reparseDetectorsFromStoredConfig( - inValidAnalysisConfigString)); - - // Expect parsing to fail if the filter referenced by the custom rule cannot be found - const std::string validAnalysisConfigStringWithUnknownFilter{ - "{\"bucket_span\":\"1h\",\"detectors\":[{\"detector_description\":\"count over ip\",\"function\":\"count\",\"over_field_name\":\"ip\",\"custom_rules\":[{\"actions\":[\"skip_result\"],\"scope\":{\"ip\":{\"filter_id\":\"unknown_filter\",\"filter_type\":\"include\"}}}],\"detector_index\":0}],\"influencers\":[],\"model_prune_window\":\"30d\"}"}; - BOOST_REQUIRE_EXCEPTION( - jobConfig.analysisConfig().reparseDetectorsFromStoredConfig(validAnalysisConfigStringWithUnknownFilter), - ml::api::CAnomalyJobConfigReader::CParseError, - [](ml::api::CAnomalyJobConfigReader::CParseError const&) { return true; }); - - // Expect parsing to succeed if the filter referenced by the custom rule is registered. - const std::string validAnalysisConfigString{ - "{\"bucket_span\":\"1h\",\"detectors\":[{\"detector_description\":\"count over ip\",\"function\":\"count\",\"over_field_name\":\"ip\",\"custom_rules\":[{\"actions\":[\"skip_result\"],\"scope\":{\"ip\":{\"filter_id\":\"safe_ips\",\"filter_type\":\"include\"}}}],\"detector_index\":0}],\"influencers\":[],\"model_prune_window\":\"30d\"}"}; - BOOST_TEST_REQUIRE(jobConfig.analysisConfig().reparseDetectorsFromStoredConfig( - validAnalysisConfigString)); -} - BOOST_AUTO_TEST_CASE(testParse) { using TAnalysisConfig = ml::api::CAnomalyJobConfig::CAnalysisConfig; diff --git a/lib/api/unittest/CAnomalyJobTest.cc b/lib/api/unittest/CAnomalyJobTest.cc index 02d5925483..d5384327ef 100644 --- a/lib/api/unittest/CAnomalyJobTest.cc +++ b/lib/api/unittest/CAnomalyJobTest.cc @@ -36,6 +36,7 @@ #include #include #include +#include #include BOOST_TEST_DONT_PRINT_LOG_VALUE(json::array::const_iterator) @@ -186,6 +187,9 @@ bool findLine(const std::string& regex, const ml::core::CRegex::TStrVec& lines) } const ml::core_t::TTime BUCKET_SIZE(3600); + +using TStrStrPr = std::pair; +using TStrStrPrVec = std::vector; } using namespace ml; @@ -851,6 +855,186 @@ BOOST_AUTO_TEST_CASE(testRestoreFailsWithEmptyStream) { BOOST_TEST_REQUIRE(job.restoreState(restoreSearcher, completeToTime) == false); } +BOOST_AUTO_TEST_CASE(testConfigUpdate) { + // This, in part, is essentially replicating the DetectionRulesIT/testScope Java REST test. + // It proves useful to have the test here too, as it provides an entrypoint for investigating + // any issues related to filters, especially when updating them when already referenced by anomaly detector models. + // We simply want to see the job run to completion. + ml::api::CAnomalyJobConfig jobConfig; + BOOST_REQUIRE_EQUAL(true, jobConfig.initFromFiles("testfiles/count_over_ip_config.json", + "testfiles/filterConfig.json", + "testfiles/eventConfig.json")); + + const ml::api::CAnomalyJobConfig::CAnalysisConfig& analysisConfig = + jobConfig.analysisConfig(); + + model::CLimits limits; + + model::CAnomalyDetectorModelConfig modelConfig = analysisConfig.makeModelConfig(); + std::stringstream outputStrm; + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); + + CTestAnomalyJob job("job", limits, jobConfig, modelConfig, wrappedOutputStream); + + auto generateRandomAlpha = [](int strLen) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, 25); + + std::string str; + for (int i = 0; i < strLen; ++i) { + str += char('a' + dis(gen)); + } + return str; + }; + + long timestamp = 1509062400000L; + TStrStrPrVec data; + + for (int bucket = 0; bucket < 20; bucket++) { + for (int i = 0; i < 5; i++) { + data.emplace_back(core::CStringUtils::typeToString(timestamp), + generateRandomAlpha(10)); + } + timestamp += 3600 * 1000; + } + + // Now send anomalous counts for our filtered IPs plus 333.333.333.333 + auto namedIps = std::vector{"111.111.111.111", "222.222.222.222", "333.333.333.333"}; + for (int i = 0; i < 10; i++) { + for (auto& ip : namedIps) { + data.emplace_back(core::CStringUtils::typeToString(timestamp), ip); + } + } + + for (int bucket = 0; bucket < 3; bucket++) { + for (int i = 0; i < 5; i++) { + data.emplace_back(core::CStringUtils::typeToString(timestamp), + generateRandomAlpha(10)); + } + timestamp += 3600 * 1000; + } + + CTestAnomalyJob::TStrStrUMap dataRows; + + for (const auto & [ time, ip ] : data) { + dataRows["time"] = time; + dataRows["ip"] = ip; + BOOST_TEST_REQUIRE(job.handleRecord(dataRows)); + } + + BOOST_REQUIRE_EQUAL(145, job.numRecordsHandled()); + + const std::string& detectorConfig1{R"( + { + "filters":[{"filter_id":"safe_ips", "items":["111.111.111.111","222.222.222.222"]}], + "events":[{"description":"event_1", "rules":[{"actions":["skip_result","skip_model_update"],"conditions":[{"applies_to":"time","operator":"gte","value": 1.0},{"applies_to":"time","operator":"lt","value": 2.0}]}]}], + "model_plot_config":{"enabled":true,"annotations_enabled":false}, + "detector_rules":{"detector_index":0,"custom_rules":[{"actions":["skip_result"],"conditions":[{"applies_to":"actual","operator":"gte","value":15.0},{"applies_to":"actual","operator":"lte","value":30.0}]}]} + } + )"}; + + job.updateConfig(detectorConfig1); + + BOOST_REQUIRE_EQUAL(1, jobConfig.analysisConfig().detectionRules().size()); + auto itr = jobConfig.analysisConfig().detectionRules().find(0); + BOOST_REQUIRE_EQUAL(1, itr->second.size()); + std::string rule{itr->second[0].print()}; + BOOST_REQUIRE_EQUAL( + std::string("SKIP_RESULT IF ACTUAL >= 15.000000 AND ACTUAL <= 30.000000"), rule); + + api::CAnomalyJobConfig::CModelPlotConfig& modelPlotConfig = jobConfig.modelPlotConfig(); + BOOST_REQUIRE_EQUAL(false, modelPlotConfig.annotationsEnabled()); + BOOST_REQUIRE_EQUAL(true, modelPlotConfig.enabled()); + + auto events = jobConfig.analysisConfig().scheduledEvents(); + BOOST_REQUIRE_EQUAL(1, events.size()); + BOOST_REQUIRE_EQUAL(std::string("event_1"), events[0].first); + BOOST_REQUIRE_EQUAL(std::string("SKIP_RESULT AND SKIP_MODEL_UPDATE IF TIME >= 1.000000 AND TIME < 2.000000"), + events[0].second.print()); + + auto ruleFilters = jobConfig.ruleFilters(); + BOOST_REQUIRE_EQUAL(1, ruleFilters.size()); + + BOOST_REQUIRE_EQUAL(true, ruleFilters["safe_ips"].contains("111.111.111.111")); + BOOST_REQUIRE_EQUAL(true, ruleFilters["safe_ips"].contains("222.222.222.222")); + BOOST_REQUIRE_EQUAL(false, ruleFilters["safe_ips"].contains("333.333.333.333")); + + const std::string& detectorConfig2{R"( + { + "filters":[{"filter_id":"safe_ips", "items":["333.333.333.333"]}], + "events":[{"description":"event_1", "rules":[{"actions":["skip_result","skip_model_update"],"conditions":[{"applies_to":"time","operator":"gte","value": 2.0},{"applies_to":"time","operator":"lt","value": 3.0}]}]}], + "model_plot_config":{"enabled":false,"annotations_enabled":true}, + "detector_rules":{"detector_index":0,"custom_rules":[{"actions":["skip_result"],"conditions":[{"applies_to":"typical","operator":"gte","value":10.0},{"applies_to":"typical","operator":"lte","value":50.0}]}]} + })"}; + + job.updateConfig(detectorConfig2); + + data.clear(); + // Send another anomalous bucket + for (int i = 0; i < 10; i++) { + for (auto& ip : namedIps) { + data.emplace_back(core::CStringUtils::typeToString(timestamp), ip); + } + } + + // Some more normal buckets + for (int bucket = 0; bucket < 3; bucket++) { + for (int i = 0; i < 5; i++) { + data.emplace_back(core::CStringUtils::typeToString(timestamp), + generateRandomAlpha(10)); + } + timestamp += 3600 * 1000; + } + + dataRows.clear(); + for (const auto & [ time, ip ] : data) { + dataRows["time"] = time; + dataRows["ip"] = ip; + BOOST_TEST_REQUIRE(job.handleRecord(dataRows)); + } + + BOOST_REQUIRE_EQUAL(190, job.numRecordsHandled()); + + BOOST_REQUIRE_EQUAL(1, jobConfig.analysisConfig().detectionRules().size()); + itr = jobConfig.analysisConfig().detectionRules().find(0); + BOOST_REQUIRE_EQUAL(1, itr->second.size()); + rule = itr->second[0].print(); + BOOST_REQUIRE_EQUAL( + std::string("SKIP_RESULT IF TYPICAL >= 10.000000 AND TYPICAL <= 50.000000"), rule); + + modelPlotConfig = jobConfig.modelPlotConfig(); + BOOST_REQUIRE_EQUAL(true, modelPlotConfig.annotationsEnabled()); + BOOST_REQUIRE_EQUAL(false, modelPlotConfig.enabled()); + + events = jobConfig.analysisConfig().scheduledEvents(); + BOOST_REQUIRE_EQUAL(1, events.size()); + BOOST_REQUIRE_EQUAL(std::string("event_1"), events[0].first); + BOOST_REQUIRE_EQUAL(std::string("SKIP_RESULT AND SKIP_MODEL_UPDATE IF TIME >= 2.000000 AND TIME < 3.000000"), + events[0].second.print()); + + ruleFilters = jobConfig.ruleFilters(); + BOOST_REQUIRE_EQUAL(1, ruleFilters.size()); + + BOOST_REQUIRE_EQUAL(false, ruleFilters["safe_ips"].contains("111.111.111.111")); + BOOST_REQUIRE_EQUAL(false, ruleFilters["safe_ips"].contains("222.222.222.222")); + BOOST_REQUIRE_EQUAL(true, ruleFilters["safe_ips"].contains("333.333.333.333")); + + job.finalise(); + wrappedOutputStream.syncFlush(); + + std::string output = outputStrm.str(); + LOG_TRACE(<< "Output has yielded: " << output); + + // check that the quantile state has actually been persisted + core::CRegex regex; + regex.init("\n"); + core::CRegex::TStrVec lines; + regex.split(output, lines); + BOOST_REQUIRE_EQUAL( + true, findLine("\"quantiles\":{\"job_id\":\"job\",\"quantile_state\".*", lines)); +} + BOOST_AUTO_TEST_CASE(testParsePersistControlMessageArgs) { { const ml::core_t::TTime expectedSnapshotTimestamp{1283524206}; @@ -999,4 +1183,26 @@ BOOST_AUTO_TEST_CASE(testRestoreFromBadState) { } } +BOOST_AUTO_TEST_CASE(testHierarchicalResultsNormalizerShouldIncreaseMemoryUsage) { + model::CLimits limits; + auto jobConfig = CTestAnomalyJob::makeSimpleJobConfig("metric", "value", "", "", ""); + auto modelConfig = model::CAnomalyDetectorModelConfig::defaultConfig(BUCKET_SIZE); + std::stringstream outputStrm; + core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); + + CTestAnomalyJob job("job", limits, jobConfig, modelConfig, wrappedOutputStream); + CTestAnomalyJob::TStrStrUMap const dataRows = { + {"time", "12345678"}, {"value", "1.0"}, {"greenhouse", "rhubarb"}}; + + BOOST_TEST_REQUIRE(job.handleRecord(dataRows)); + auto resourceMonitor = limits.resourceMonitor(); + resourceMonitor.forceRefreshAll(); + BOOST_TEST_REQUIRE(job.mutableNormalizer().memoryUsage() > 0); + + // Unregister the normalizer and check that memory usage decreases + auto memoryUsageBeforeUnregister = resourceMonitor.totalMemory(); + resourceMonitor.unRegisterComponent(job.mutableNormalizer()); + resourceMonitor.forceRefreshAll(); + BOOST_TEST_REQUIRE(resourceMonitor.totalMemory() < memoryUsageBeforeUnregister); +} BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/api/unittest/CIoManagerTest.cc b/lib/api/unittest/CIoManagerTest.cc index fcbdd1f763..1c99ebb46e 100644 --- a/lib/api/unittest/CIoManagerTest.cc +++ b/lib/api/unittest/CIoManagerTest.cc @@ -29,7 +29,7 @@ BOOST_AUTO_TEST_SUITE(CIoManagerTest) namespace { const std::uint32_t SLEEP_TIME_MS{100}; -const std::uint32_t PAUSE_TIME_MS{10}; +const std::uint32_t PAUSE_TIME_MS{40}; const std::size_t MAX_ATTEMPTS{100}; const std::size_t TEST_SIZE{10000}; const char TEST_CHAR{'a'}; diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index a41d0dccf1..ba44163e7c 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -1010,6 +1011,7 @@ BOOST_AUTO_TEST_CASE(testGeoResultsWrite) { std::string functionDescription("lat_long(location)"); ml::api::CHierarchicalResultsWriter::TOptionalStrOptionalStrPrDoublePrVec influences; std::string emptyString; + std::string mean_function("mean"); // The output writer won't close the JSON structures until is is destroyed { std::ostringstream sstream; @@ -1099,8 +1101,8 @@ BOOST_AUTO_TEST_CASE(testGeoResultsWrite) { ml::api::CHierarchicalResultsWriter::SResults result( ml::api::CHierarchicalResultsWriter::E_Result, partitionFieldName, partitionFieldValue, byFieldName, byFieldValue, - correlatedByFieldValue, 1, "mean", functionDescription, 2.24, - 79, typical, actual, 10.0, 10.0, 0.5, 0.0, fieldName, + correlatedByFieldValue, 1, mean_function, functionDescription, + 2.24, 79, typical, actual, 10.0, 10.0, 0.5, 0.0, fieldName, influences, false, true, 1, 1, EMPTY_STRING_LIST, {}); BOOST_TEST_REQUIRE(writer.acceptResult(result)); BOOST_TEST_REQUIRE(writer.endOutputBatch(false, 1U)); @@ -1687,7 +1689,8 @@ BOOST_AUTO_TEST_CASE(testPersistNormalizer) { ml::core::CJsonOutputStreamWrapper outputStream(sstream); ml::api::CJsonOutputWriter writer("job", outputStream); - ml::model::CHierarchicalResultsNormalizer normalizer(modelConfig); + ml::model::CLimits limits(false); + ml::model::CHierarchicalResultsNormalizer normalizer(limits, modelConfig); writer.persistNormalizer(normalizer, persistTime); writer.finalise(); } diff --git a/lib/api/unittest/CRestorePreviousStateTest.cc b/lib/api/unittest/CRestorePreviousStateTest.cc index 86bf30b2e2..7b06ec1e52 100644 --- a/lib/api/unittest/CRestorePreviousStateTest.cc +++ b/lib/api/unittest/CRestorePreviousStateTest.cc @@ -269,7 +269,8 @@ BOOST_FIXTURE_TEST_CASE(testRestoreNormalizer, ml::test::CProgramCounterClearing ml::model::CAnomalyDetectorModelConfig modelConfig = ml::model::CAnomalyDetectorModelConfig::defaultConfig(3600); ml::api::CCsvOutputWriter outputWriter; - ml::api::CResultNormalizer normalizer(modelConfig, outputWriter); + ml::model::CLimits limits(false); + ml::api::CResultNormalizer normalizer(modelConfig, outputWriter, limits); BOOST_TEST_REQUIRE(normalizer.initNormalizer( "testfiles/state/" + version.s_Version + "/normalizer_state.json")); } diff --git a/lib/api/unittest/CResultNormalizerTest.cc b/lib/api/unittest/CResultNormalizerTest.cc index 316611364d..a3d08acf6d 100644 --- a/lib/api/unittest/CResultNormalizerTest.cc +++ b/lib/api/unittest/CResultNormalizerTest.cc @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -31,7 +32,8 @@ BOOST_AUTO_TEST_CASE(testInitNormalizerPartitioned) { ml::api::CNdJsonOutputWriter outputWriter; - ml::api::CResultNormalizer normalizer(modelConfig, outputWriter); + ml::model::CLimits limits(false); + ml::api::CResultNormalizer normalizer(modelConfig, outputWriter, limits); BOOST_TEST_REQUIRE(normalizer.initNormalizer("testfiles/new_quantilesState.json")); LOG_DEBUG(<< "normalizer initialized"); @@ -390,7 +392,8 @@ BOOST_AUTO_TEST_CASE(testInitNormalizer) { ml::api::CNdJsonOutputWriter outputWriter; - ml::api::CResultNormalizer normalizer(modelConfig, outputWriter); + ml::model::CLimits limits(false); + ml::api::CResultNormalizer normalizer(modelConfig, outputWriter, limits); BOOST_TEST_REQUIRE(normalizer.initNormalizer("testfiles/quantilesState.json")); diff --git a/lib/api/unittest/CTestAnomalyJob.cc b/lib/api/unittest/CTestAnomalyJob.cc index 89e139fc3e..5a3f678932 100644 --- a/lib/api/unittest/CTestAnomalyJob.cc +++ b/lib/api/unittest/CTestAnomalyJob.cc @@ -50,3 +50,25 @@ CTestAnomalyJob::makeSimpleJobConfig(const std::string& functionName, influencers, summaryCountFieldName); return jobConfig; } + +ml::api::CAnomalyJobConfig CTestAnomalyJob::makeJobConfig(const std::string& detectorsConfig) { + json::parser p; + boost::system::error_code ec; + p.write_some(detectorsConfig, ec); + if (ec) { + LOG_ERROR(<< "An error occurred while parsing JSON: " << ec.message()); + return {}; + } + json::value doc = p.release(); + if (doc.is_object() == false) { + LOG_ERROR(<< "Input error: expected JSON object but input was '" + << detectorsConfig << "'. Please report this problem."); + return {}; + } + + json::object obj = doc.as_object(); + + ml::api::CAnomalyJobConfig jobConfig; + jobConfig.analysisConfig().parseDetectorsConfig(obj); + return jobConfig; +} \ No newline at end of file diff --git a/lib/api/unittest/CTestAnomalyJob.h b/lib/api/unittest/CTestAnomalyJob.h index c794d7e17c..a643420f38 100644 --- a/lib/api/unittest/CTestAnomalyJob.h +++ b/lib/api/unittest/CTestAnomalyJob.h @@ -55,6 +55,12 @@ class CTestAnomalyJob : public ml::api::CAnomalyJob { const std::string& partitionFieldName, const TStrVec& influencers = {}, const std::string& summaryCountFieldName = ""); + + static ml::api::CAnomalyJobConfig makeJobConfig(const std::string& detectorsConfig); + + ml::model::CHierarchicalResultsNormalizer& mutableNormalizer() const { + return const_cast(this->normalizer()); + } }; #endif // INCLUDED_CTestAnomalyJob_h diff --git a/lib/api/unittest/testfiles/count_over_ip_config.json b/lib/api/unittest/testfiles/count_over_ip_config.json new file mode 100644 index 0000000000..170afb1925 --- /dev/null +++ b/lib/api/unittest/testfiles/count_over_ip_config.json @@ -0,0 +1,44 @@ +{ + "job_id": "detection-rules-it-test-scope", + "job_type": "anomaly_detector", + "job_version": "12.0.0", + "create_time": 1739482196563, + "analysis_config": { + "bucket_span": "1h", + "detectors": [ + { + "detector_description": "count over ip", + "function": "count", + "over_field_name": "ip", + "custom_rules": [ + { + "actions": [ + "skip_result" + ], + "scope": { + "ip": { + "filter_id": "safe_ips", + "filter_type": "include" + } + } + } + ], + "detector_index": 0 + } + ], + "influencers": [], + "model_prune_window": "30d" + }, + "analysis_limits": { + "model_memory_limit": "1024mb", + "categorization_examples_limit": 4 + }, + "data_description": { + "time_field": "time", + "time_format": "epoch_ms" + }, + "model_snapshot_retention_days": 10, + "daily_model_snapshot_retention_after_days": 1, + "results_index_name": "shared", + "allow_lazy_open": false +} diff --git a/lib/api/unittest/testfiles/eventConfig.json b/lib/api/unittest/testfiles/eventConfig.json new file mode 100644 index 0000000000..c14d27bb64 --- /dev/null +++ b/lib/api/unittest/testfiles/eventConfig.json @@ -0,0 +1,4 @@ +{ + "events": [ + ] +} diff --git a/lib/api/unittest/testfiles/filterConfig.json b/lib/api/unittest/testfiles/filterConfig.json new file mode 100644 index 0000000000..682f3d451d --- /dev/null +++ b/lib/api/unittest/testfiles/filterConfig.json @@ -0,0 +1,11 @@ +{ + "filters": [ + { + "filter_id": "safe_ips", + "items": [ + "111.111.111.111", + "222.222.222.222" + ] + } + ] +} diff --git a/lib/core/CDetachedProcessSpawner.cc b/lib/core/CDetachedProcessSpawner.cc index d01031c64c..c7f9b1186e 100644 --- a/lib/core/CDetachedProcessSpawner.cc +++ b/lib/core/CDetachedProcessSpawner.cc @@ -185,13 +185,22 @@ class CTrackerThread : public CThread { // at a lower level LOG_INFO(<< "Child process with PID " << pid << " was terminated by signal " << signal); - } else { + } else if (signal == SIGKILL) { // This should never happen if the system is working // normally - possible reasons are the Linux OOM - // killer, manual intervention and bugs that cause - // access violations + // killer or manual intervention. The latter is highly unlikely + // if running in the cloud. + LOG_ERROR(<< "Child process with PID " << pid << " was terminated by signal 9 (SIGKILL)." + << " This is likely due to the OOM killer." + << " Please check system logs for more details."); + } else { + // This should never happen if the system is working + // normally - possible reasons are bugs that cause + // access violations or manual intervention. The latter is highly unlikely + // if running in the cloud. LOG_ERROR(<< "Child process with PID " << pid - << " was terminated by signal " << signal); + << " was terminated by signal " << signal + << " Please check system logs for more details."); } } else { int exitCode = WEXITSTATUS(status); diff --git a/lib/core/CFlatPrefixTree.cc b/lib/core/CFlatPrefixTree.cc index 56f6dbdcfc..f36489346e 100644 --- a/lib/core/CFlatPrefixTree.cc +++ b/lib/core/CFlatPrefixTree.cc @@ -11,6 +11,7 @@ #include +#include #include #include #include @@ -226,5 +227,24 @@ std::string CFlatPrefixTree::print() const { result += "]"; return result; } + +std::uint64_t CFlatPrefixTree::SNode::checksum() const { + std::uint64_t result{0}; + result = CHashing::hashCombine(result, static_cast(s_Char)); + result = CHashing::hashCombine(result, static_cast(s_Type)); + result = CHashing::hashCombine(result, static_cast(s_Next)); + return result; +} + +std::uint64_t CFlatPrefixTree::checksum() const { + std::uint64_t result{0}; + + // Iterate over m_FlatTree + for (const auto& node : m_FlatTree) { + result = CHashing::hashCombine(result, node.checksum()); + } + + return result; +} } } diff --git a/lib/core/CPatternSet.cc b/lib/core/CPatternSet.cc index 393d6796f8..c4cf3b6275 100644 --- a/lib/core/CPatternSet.cc +++ b/lib/core/CPatternSet.cc @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -153,5 +154,16 @@ void CPatternSet::clear() { m_SuffixPatterns.clear(); m_ContainsPatterns.clear(); } + +std::uint64_t CPatternSet::checksum() const { + std::uint64_t result{0}; + + result = CHashing::hashCombine(result, m_FullMatchPatterns.checksum()); + result = CHashing::hashCombine(result, m_PrefixPatterns.checksum()); + result = CHashing::hashCombine(result, m_SuffixPatterns.checksum()); + result = CHashing::hashCombine(result, m_ContainsPatterns.checksum()); + + return result; +} } } diff --git a/lib/core/unittest/CMemoryUsageTest.cc b/lib/core/unittest/CMemoryUsageTest.cc index 91fd00c76c..3fc0cc8c69 100644 --- a/lib/core/unittest/CMemoryUsageTest.cc +++ b/lib/core/unittest/CMemoryUsageTest.cc @@ -1287,12 +1287,12 @@ BOOST_AUTO_TEST_CASE(testSmallVector) { BOOST_REQUIRE_EQUAL(0, extraMem); growShrink.push_back(1.7); extraMem = core::memory::dynamicSize(growShrink); - // Interesting (shocking?) result: once a boost::small_vector has switched - // off of internal storage it will NEVER go back to internal storage. - // Arguably this is a bug, and this assertion might start failing after a - // Boost upgrade. If that happens and changing it to assert extraMem is 0 - // fixes it then this means boost::small_vector has been improved. - BOOST_TEST_REQUIRE(extraMem > 0); + // Interestingly we used to assert extraMem > 0 here as it used to be the case + // that once a boost::small_vector had switched + // off of internal storage it would NEVER go back to internal storage. + // Arguably that was a bug, and this assertion started failing after + // upgrading Boost to 1.86.0, meaning that boost::small_vector has been improved. + BOOST_TEST_REQUIRE(extraMem >= 0); // Change to `==` once upgraded to Boost 1.86 on all platforms } BOOST_AUTO_TEST_CASE(testAlignedVector) { diff --git a/lib/maths/analytics/unittest/CBoostedTreeTest.cc b/lib/maths/analytics/unittest/CBoostedTreeTest.cc index 3161800b97..dbed0b6653 100644 --- a/lib/maths/analytics/unittest/CBoostedTreeTest.cc +++ b/lib/maths/analytics/unittest/CBoostedTreeTest.cc @@ -488,8 +488,8 @@ BOOST_AUTO_TEST_CASE(testEdgeCases) { auto frame = core::makeMainStorageDataFrame(cols).first; - fillDataFrame(5, 0, 2, {{1.0}, {1.0}, {1.0}, {1.0}, {1.0}}, - {0.0, 0.0, 0.0, 0.0, 0.0}, [](const TRowRef&) { return 1.0; }, *frame); + fillDataFrame(5, 0, 2, {{1.0, 1.0, 1.0, 1.0, 1.0}}, {0.0, 0.0, 0.0, 0.0, 0.0}, + [](const TRowRef&) { return 1.0; }, *frame); BOOST_REQUIRE_NO_THROW(maths::analytics::CBoostedTreeFactory::constructFromParameters( 1, std::make_unique()) diff --git a/lib/maths/common/unittest/CKMostCorrelatedTest.cc b/lib/maths/common/unittest/CKMostCorrelatedTest.cc index 1cc23fbf90..5b8fd373d6 100644 --- a/lib/maths/common/unittest/CKMostCorrelatedTest.cc +++ b/lib/maths/common/unittest/CKMostCorrelatedTest.cc @@ -764,7 +764,7 @@ BOOST_AUTO_TEST_CASE(testScale) { double sdRatio = std::sqrt(maths::common::CBasicStatistics::variance(slope)) / maths::common::CBasicStatistics::mean(slope); LOG_DEBUG(<< "sdRatio = " << sdRatio); - BOOST_TEST_REQUIRE(exponent < 2.0); + BOOST_TEST(exponent <= 2.0, boost::test_tools::tolerance(0.1)); BOOST_TEST_REQUIRE(sdRatio < 0.75); } diff --git a/lib/maths/time_series/CTrendComponent.cc b/lib/maths/time_series/CTrendComponent.cc index 78f9a27216..caaaa288bb 100644 --- a/lib/maths/time_series/CTrendComponent.cc +++ b/lib/maths/time_series/CTrendComponent.cc @@ -310,10 +310,15 @@ void CTrendComponent::shiftLevel(double shift, double magnitude{shifts[last] - shifts[next - 1]}; if (m_TimeOfLastLevelChange != UNSET_TIME) { double dt{static_cast(time - m_TimeOfLastLevelChange)}; - double value{static_cast( - common::CBasicStatistics::mean(values[segments[next] - 1]))}; - m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(LEVEL_CHANGE_LABEL, - {{dt}, {value}}); + if (values.size() > segments[next] - 1) { + double value{static_cast( + common::CBasicStatistics::mean(values[segments[next] - 1]))}; + m_ProbabilityOfLevelChangeModel.addTrainingDataPoint(LEVEL_CHANGE_LABEL, + {{dt}, {value}}); + } else { + LOG_DEBUG(<< "Size mis-match reading from values. Length = " + << values.size() << ", requested index = " << segments[next] - 1); + } } m_TimeOfLastLevelChange = time; for (std::size_t i = segments[last]; i < values.size(); ++i, time += bucketLength) { diff --git a/lib/maths/time_series/unittest/CCalendarCyclicTestTest.cc b/lib/maths/time_series/unittest/CCalendarCyclicTestTest.cc index 4609953e33..e6a791293b 100644 --- a/lib/maths/time_series/unittest/CCalendarCyclicTestTest.cc +++ b/lib/maths/time_series/unittest/CCalendarCyclicTestTest.cc @@ -528,7 +528,7 @@ BOOST_AUTO_TEST_CASE(testLongBuckets) { TDoubleVec error; for (core_t::TTime time = 0, i = 0; time <= end; time += DAY) { rng.generateNormalSamples(0.0, 9.0, 1, error); - if (time >= months[i] && time < months[i] + DAY) { + if (time >= months[i] && time < months[i] + DAY && i < months.size() - 1) { error[0] += 20.0; ++i; } diff --git a/lib/model/CAnomalyDetectorModel.cc b/lib/model/CAnomalyDetectorModel.cc index c39339dc6b..90f6f95de9 100644 --- a/lib/model/CAnomalyDetectorModel.cc +++ b/lib/model/CAnomalyDetectorModel.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -48,6 +49,7 @@ const std::string EMPTY; const model_t::CResultType SKIP_SAMPLING_RESULT_TYPE; const double SKIP_SAMPLING_WEIGHT{0.005}; +const core_t::TTime APPLIED_DETECTION_RULE_EXPIRATION{31536000}; // 1 year const CAnomalyDetectorModel::TStr1Vec EMPTY_STRING_LIST; @@ -325,6 +327,7 @@ std::uint64_t CAnomalyDetectorModel::checksum(bool /*includeCurrentBucketStats*/ hash = maths::common::CChecksum::calculate(hash, m_PersonBucketCounts[pid]); } } + seed = maths::common::CChecksum::calculate(seed, m_AppliedRuleChecksums); LOG_TRACE(<< "seed = " << seed); LOG_TRACE(<< "checksums = " << hashes); return maths::common::CChecksum::calculate(seed, hashes); @@ -336,6 +339,7 @@ void CAnomalyDetectorModel::debugMemoryUsage(const core::CMemoryUsage::TMemoryUs core::memory_debug::dynamicSize("m_Params", m_Params, mem); core::memory_debug::dynamicSize("m_PersonBucketCounts", m_PersonBucketCounts, mem); core::memory_debug::dynamicSize("m_InfluenceCalculators", m_InfluenceCalculators, mem); + core::memory_debug::dynamicSize("m_AppliedRuleChecksums", m_AppliedRuleChecksums, mem); } std::size_t CAnomalyDetectorModel::memoryUsage() const { @@ -343,6 +347,7 @@ std::size_t CAnomalyDetectorModel::memoryUsage() const { mem += core::memory::dynamicSize(m_DataGatherer); mem += core::memory::dynamicSize(m_PersonBucketCounts); mem += core::memory::dynamicSize(m_InfluenceCalculators); + mem += core::memory::dynamicSize(m_AppliedRuleChecksums); return mem; } @@ -657,5 +662,35 @@ void CAnomalyDetectorModel::CTimeSeriesCorrelateModelAllocator::prototypePrior( bool CMemoryCircuitBreaker::areAllocationsAllowed() const { return m_ResourceMonitor->areAllocationsAllowed(); } + +CAnomalyDetectorModel::TUint64TTimePrVec& CAnomalyDetectorModel::appliedRuleChecksums() { + return m_AppliedRuleChecksums; +} + +const CAnomalyDetectorModel::TUint64TTimePrVec& +CAnomalyDetectorModel::appliedRuleChecksums() const { + return m_AppliedRuleChecksums; +} + +bool CAnomalyDetectorModel::checkRuleApplied(const CDetectionRule& rule) const { + auto checksum = rule.checksum(); + return std::find_if(m_AppliedRuleChecksums.begin(), + m_AppliedRuleChecksums.end(), [checksum](const auto& pair) { + return pair.first == checksum; + }) != m_AppliedRuleChecksums.end(); +} + +void CAnomalyDetectorModel::markRuleApplied(const CDetectionRule& rule) { + auto currentTime = core::CTimeUtils::now(); + m_AppliedRuleChecksums.emplace_back(rule.checksum(), currentTime); + + // Remove all rules that are older than the expiration time + m_AppliedRuleChecksums.erase( + std::remove_if(m_AppliedRuleChecksums.begin(), m_AppliedRuleChecksums.end(), + [currentTime](const auto& pair) { + return currentTime - pair.second > APPLIED_DETECTION_RULE_EXPIRATION; + }), + m_AppliedRuleChecksums.end()); +} } } diff --git a/lib/model/CBucketGatherer.cc b/lib/model/CBucketGatherer.cc index a4f85e23c4..7a987ec6ae 100644 --- a/lib/model/CBucketGatherer.cc +++ b/lib/model/CBucketGatherer.cc @@ -24,6 +24,8 @@ #include +#include + #include #include diff --git a/lib/model/CCountingModel.cc b/lib/model/CCountingModel.cc index 06fb6a34ea..dc938e9f06 100644 --- a/lib/model/CCountingModel.cc +++ b/lib/model/CCountingModel.cc @@ -36,6 +36,7 @@ namespace { const std::string WINDOW_BUCKET_COUNT_TAG("a"); const std::string PERSON_BUCKET_COUNT_TAG("b"); const std::string MEAN_COUNT_TAG("c"); +const std::string APPLIED_DETECTION_RULE_CHECKSUMS_TAG("d"); // Extra data tag deprecated at model version 34 // TODO remove on next version bump //const std::string EXTRA_DATA_TAG("d"); @@ -79,6 +80,8 @@ void CCountingModel::acceptPersistInserter(core::CStatePersistInserter& inserter core::CPersistUtils::persist(PERSON_BUCKET_COUNT_TAG, this->personBucketCounts(), inserter); core::CPersistUtils::persist(MEAN_COUNT_TAG, m_MeanCounts, inserter); + core::CPersistUtils::persist(APPLIED_DETECTION_RULE_CHECKSUMS_TAG, + this->appliedRuleChecksums(), inserter); } bool CCountingModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -102,6 +105,12 @@ bool CCountingModel::acceptRestoreTraverser(core::CStateRestoreTraverser& traver LOG_ERROR(<< "Invalid mean counts"); return false; } + } else if (name == APPLIED_DETECTION_RULE_CHECKSUMS_TAG) { + if (core::CPersistUtils::restore(name, this->appliedRuleChecksums(), + traverser) == false) { + LOG_ERROR(<< "Invalid applied detection rule checksums"); + return false; + } } } while (traverser.next()); diff --git a/lib/model/CDetectionRule.cc b/lib/model/CDetectionRule.cc index 0ad0c21d76..0a4fa6de6c 100644 --- a/lib/model/CDetectionRule.cc +++ b/lib/model/CDetectionRule.cc @@ -8,12 +8,16 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ +#include #include #include +#include + #include -#include + +#include namespace ml { namespace model { @@ -38,6 +42,10 @@ void CDetectionRule::addCondition(const CRuleCondition& condition) { m_Conditions.push_back(condition); } +void CDetectionRule::clearConditions() { + m_Conditions.clear(); +} + void CDetectionRule::setCallback(TCallback cb) { m_Callback = std::move(cb); } @@ -74,22 +82,25 @@ void CDetectionRule::executeCallback(CAnomalyDetectorModel& model, core_t::TTime return; } } + if (model.checkRuleApplied(*this)) { + return; + } m_Callback(model, time); + + // Time shift rules should be applied only once + if (m_Action & E_TimeShift) { + model.markRuleApplied(*this); + } } } void CDetectionRule::addTimeShift(core_t::TTime timeShift) { - using TAnomalyDetectorPtrVec = core::CSmallVector; - this->setCallback([ - timeShift, timeShiftApplied = TAnomalyDetectorPtrVec() - ](CAnomalyDetectorModel & model, core_t::TTime time) mutable { - if (std::find(timeShiftApplied.begin(), timeShiftApplied.end(), &model) == - timeShiftApplied.end()) { - // When the callback is executed, the model is already in the correct time - // interval. Hence, we need to shift the time right away. - model.shiftTime(time, timeShift); - timeShiftApplied.emplace_back(&model); - } + m_Action |= E_TimeShift; + m_TimeShift = timeShift; + this->setCallback([timeShift](CAnomalyDetectorModel& model, core_t::TTime time) { + // When the callback is executed, the model is already in the correct time + // interval. Hence, we need to shift the time right away. + model.shiftTime(time, timeShift); }); } @@ -129,5 +140,22 @@ std::string CDetectionRule::printAction() const { } return result; } + +std::uint64_t CDetectionRule::checksum() const { + std::uint64_t result = maths::common::CChecksum::calculate(0, m_Action); + result = maths::common::CChecksum::calculate(result, m_Scope); + result = maths::common::CChecksum::calculate(result, m_Conditions); + + // Hash callback parameters if applicable + if (m_Action & E_TimeShift) { + // Hash m_TimeShift + result = maths::common::CChecksum::calculate(result, m_TimeShift); + } + + // IMPLEMENTATION NOTE: If there are other parameters associated with the callback, + // they should be included in the checksum. + + return result; +} } } diff --git a/lib/model/CForecastModelPersist.cc b/lib/model/CForecastModelPersist.cc index 0f59a09a50..51d353c6ff 100644 --- a/lib/model/CForecastModelPersist.cc +++ b/lib/model/CForecastModelPersist.cc @@ -128,11 +128,7 @@ bool CForecastModelPersist::CRestore::nextModel(TMathsModelPtr& model, m_ModelParams.s_MaximumTimeToTestForChange}; maths::common::SModelRestoreParams params{ - modelParams, - maths::common::STimeSeriesDecompositionRestoreParams{ - m_ModelParams.s_DecayRate, m_ModelParams.s_BucketLength, - m_ModelParams.s_ComponentSize, - m_ModelParams.distributionRestoreParams(dataType)}, + modelParams, m_ModelParams.decompositionRestoreParams(dataType), m_ModelParams.distributionRestoreParams(dataType)}; auto serialiserOperator = diff --git a/lib/model/CHierarchicalResultsAggregator.cc b/lib/model/CHierarchicalResultsAggregator.cc index 383699dde3..34eb2a67bd 100644 --- a/lib/model/CHierarchicalResultsAggregator.cc +++ b/lib/model/CHierarchicalResultsAggregator.cc @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/lib/model/CHierarchicalResultsNormalizer.cc b/lib/model/CHierarchicalResultsNormalizer.cc index ad0e249f87..3f83b6f81c 100644 --- a/lib/model/CHierarchicalResultsNormalizer.cc +++ b/lib/model/CHierarchicalResultsNormalizer.cc @@ -22,7 +22,6 @@ #include #include -#include #include namespace ml { @@ -38,7 +37,6 @@ const std::string INFLUENCER_CUE_PREFIX("infl"); const std::string PARTITION_CUE_PREFIX("part"); const std::string PERSON_CUE_PREFIX("per"); const std::string LEAF_CUE_PREFIX("leaf"); -const std::string EMPTY_STRING; } namespace hierarchical_results_normalizer_detail { @@ -59,12 +57,47 @@ std::uint64_t SNormalizer::checksum() const { std::uint64_t seed = maths::common::CChecksum::calculate(0, s_Description); return maths::common::CChecksum::calculate(seed, s_Normalizer); } + +void SNormalizer::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const { + mem->setName("SNormalizer Memory Usage"); + core::memory_debug::dynamicSize("s_Description", s_Description, mem); + core::memory_debug::dynamicSize("s_Normalizer", s_Normalizer, mem); +} + +std::size_t SNormalizer::memoryUsage() const { + std::size_t mem = 0; + mem += core::memory::dynamicSize(s_Description); + mem += core::memory::dynamicSize(s_Normalizer); + return mem; +} +} + +void CHierarchicalResultsNormalizer::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const { + mem->setName(" Hierarchical Results Normalizer Memory Usage"); + this->CHierarchicalResultsLevelSet::debugMemoryUsage(mem->addChild()); } -CHierarchicalResultsNormalizer::CHierarchicalResultsNormalizer(const CAnomalyDetectorModelConfig& modelConfig) +std::size_t CHierarchicalResultsNormalizer::memoryUsage() const { + return this->CHierarchicalResultsLevelSet::memoryUsage(); +} +std::size_t CHierarchicalResultsNormalizer::staticSize() const { + return sizeof(*this); +} + +void CHierarchicalResultsNormalizer::updateModelSizeStats( + CResourceMonitor::SModelSizeStats& /*modelSizeStats*/) const { + // do nothing +} + +CHierarchicalResultsNormalizer::CHierarchicalResultsNormalizer(CLimits& limits, + const CAnomalyDetectorModelConfig& modelConfig) : TBase(TNormalizer(std::string(), std::make_shared(modelConfig))), - m_Job(E_NoOp), m_ModelConfig(modelConfig), m_HasLastUpdateCausedBigChange(false) { + m_Limits(limits), m_ModelConfig(modelConfig) { + limits.resourceMonitor().registerComponent(*this); +} +CHierarchicalResultsNormalizer::~CHierarchicalResultsNormalizer() { + m_Limits.resourceMonitor().unRegisterComponent(*this); // NOSONAR } void CHierarchicalResultsNormalizer::setJob(EJob job) { diff --git a/lib/model/CIndividualModel.cc b/lib/model/CIndividualModel.cc index 33d650402e..e5b8fbafc3 100644 --- a/lib/model/CIndividualModel.cc +++ b/lib/model/CIndividualModel.cc @@ -67,6 +67,7 @@ const std::string FEATURE_CORRELATE_MODELS_TAG("f"); //const std::string INTERIM_BUCKET_CORRECTOR_TAG("h"); const std::string MEMORY_ESTIMATOR_TAG("i"); const std::string UPGRADING_PRE_7_5_STATE("j"); +const std::string APPLIED_DETECTION_RULE_CHECKSUMS_TAG("k"); } CIndividualModel::CIndividualModel(const SModelParams& params, @@ -357,6 +358,8 @@ void CIndividualModel::doAcceptPersistInserter(core::CStatePersistInserter& inse } core::CPersistUtils::persist(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, inserter); inserter.insertValue(UPGRADING_PRE_7_5_STATE, false); + core::CPersistUtils::persist(APPLIED_DETECTION_RULE_CHECKSUMS_TAG, + this->appliedRuleChecksums(), inserter); } bool CIndividualModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -387,6 +390,8 @@ bool CIndividualModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& tr RESTORE(MEMORY_ESTIMATOR_TAG, core::CPersistUtils::restore(MEMORY_ESTIMATOR_TAG, m_MemoryEstimator, traverser)) RESTORE_BUILT_IN(UPGRADING_PRE_7_5_STATE, upgradingPre7p5State) + RESTORE(APPLIED_DETECTION_RULE_CHECKSUMS_TAG, + core::CPersistUtils::restore(name, this->appliedRuleChecksums(), traverser)); } while (traverser.next()); if (traverser.haveBadState()) { diff --git a/lib/model/CPopulationModel.cc b/lib/model/CPopulationModel.cc index a803815b4e..28d6358359 100644 --- a/lib/model/CPopulationModel.cc +++ b/lib/model/CPopulationModel.cc @@ -91,6 +91,8 @@ const std::string ATTRIBUTE_FIRST_BUCKET_TIME_TAG("d"); const std::string ATTRIBUTE_LAST_BUCKET_TIME_TAG("e"); const std::string PERSON_ATTRIBUTE_BUCKET_COUNT_TAG("f"); const std::string DISTINCT_PERSON_COUNT_TAG("g"); +const std::string APPLIED_DETECTION_RULE_CHECKSUMS_TAG("h"); + // Extra data tag deprecated at model version 34 // TODO remove on next version bump //const std::string EXTRA_DATA_TAG("h"); @@ -294,6 +296,8 @@ void CPopulationModel::doAcceptPersistInserter(core::CStatePersistInserter& inse std::bind(&maths::common::CBjkstUniqueValues::acceptPersistInserter, &m_DistinctPersonCounts[cid], std::placeholders::_1)); } + core::CPersistUtils::persist(APPLIED_DETECTION_RULE_CHECKSUMS_TAG, + this->appliedRuleChecksums(), inserter); } bool CPopulationModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& traverser) { @@ -316,12 +320,17 @@ bool CPopulationModel::doAcceptRestoreTraverser(core::CStateRestoreTraverser& tr maths::time_series::CCountMinSketch(0, 0)); m_PersonAttributeBucketCounts.back().swap(sketch); continue; - } - if (name == DISTINCT_PERSON_COUNT_TAG) { + } else if (name == DISTINCT_PERSON_COUNT_TAG) { maths::common::CBjkstUniqueValues sketch(traverser); m_DistinctPersonCounts.push_back(maths::common::CBjkstUniqueValues(0, 0)); m_DistinctPersonCounts.back().swap(sketch); continue; + } else if (name == APPLIED_DETECTION_RULE_CHECKSUMS_TAG) { + if (core::CPersistUtils::restore(name, this->appliedRuleChecksums(), + traverser) == false) { + LOG_ERROR(<< "Invalid applied detection rule checksums"); + return false; + } } } while (traverser.next()); diff --git a/lib/model/CRuleCondition.cc b/lib/model/CRuleCondition.cc index 6f6ba6c475..8b9a35fb0e 100644 --- a/lib/model/CRuleCondition.cc +++ b/lib/model/CRuleCondition.cc @@ -12,6 +12,8 @@ #include #include +#include + #include #include @@ -164,5 +166,12 @@ std::string CRuleCondition::print(ERuleConditionOperator op) const { } return std::string(); } + +std::uint64_t CRuleCondition::checksum() const { + std::uint64_t result{maths::common::CChecksum::calculate(0, m_AppliesTo)}; + result = maths::common::CChecksum::calculate(result, m_Operator); + result = maths::common::CChecksum::calculate(result, m_Value); + return result; +} } } diff --git a/lib/model/CRuleScope.cc b/lib/model/CRuleScope.cc index 2cd541c5c7..e239491d7f 100644 --- a/lib/model/CRuleScope.cc +++ b/lib/model/CRuleScope.cc @@ -13,6 +13,8 @@ #include +#include + #include #include @@ -70,5 +72,15 @@ std::string CRuleScope::print() const { } return result; } + +std::uint64_t CRuleScope::checksum() const { + std::uint64_t result{0}; + for (const auto& triple : m_Scope) { + result = maths::common::CChecksum::calculate(result, triple.first); + result = maths::common::CChecksum::calculate(result, triple.second.get()); + result = maths::common::CChecksum::calculate(result, triple.third); + } + return result; +} } } diff --git a/lib/model/unittest/CCountingModelTest.cc b/lib/model/unittest/CCountingModelTest.cc index e4a3a5de65..facdbde9c1 100644 --- a/lib/model/unittest/CCountingModelTest.cc +++ b/lib/model/unittest/CCountingModelTest.cc @@ -10,6 +10,7 @@ */ #include +#include #include #include @@ -23,6 +24,7 @@ #include #include "CModelTestFixtureBase.h" +#include "core/CJsonStateRestoreTraverser.h" #include @@ -263,4 +265,44 @@ BOOST_FIXTURE_TEST_CASE(testInterimBucketCorrector, CTestFixture) { } } +BOOST_FIXTURE_TEST_CASE(testAppliedRuleChecksumsPersistRestore, CTestFixture) { + // Check that applied rule checksums are persisted and restored correctly. + + core_t::TTime time{200}; + core_t::TTime bucketLength{600}; + + SModelParams params(bucketLength); + params.s_DecayRate = 0.001; + + this->makeModel(params, {model_t::E_IndividualCountByBucketAndPerson}, time); + CCountingModel* model = dynamic_cast(m_Model.get()); + BOOST_TEST_REQUIRE(model); + + // Create a time shift detection rule and apply it + CRuleCondition conditionGte; + conditionGte.appliesTo(CRuleCondition::E_Time); + conditionGte.op(CRuleCondition::E_GTE); + conditionGte.value(100.0); + + CDetectionRule rule; + rule.addCondition(conditionGte); + rule.addTimeShift(100); + rule.executeCallback(*model, time); + + // Persist the model with CCountingModel::acceptPersistInserter + std::ostringstream persistStream; + core::CJsonStatePersistInserter inserter(persistStream); + model->acceptPersistInserter(inserter); + std::string persist = persistStream.str(); + + // Restore the model with CCountingModel::acceptRestoreTraversal + std::istringstream restoreStream(persist); + core::CJsonStateRestoreTraverser traverser(restoreStream); + auto restoredModel = std::make_shared( + params, m_Gatherer, m_InterimBucketCorrector, traverser); + + // Check that for the restored model the rule is marked as applied + BOOST_REQUIRE(model->checkRuleApplied(rule) == true); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/model/unittest/CDetectionRuleTest.cc b/lib/model/unittest/CDetectionRuleTest.cc index 09c408790e..271ce6849e 100644 --- a/lib/model/unittest/CDetectionRuleTest.cc +++ b/lib/model/unittest/CDetectionRuleTest.cc @@ -1095,4 +1095,99 @@ BOOST_FIXTURE_TEST_CASE(testTwoTimeShiftRuleShouldShiftTwice, CTestFixture) { BOOST_TEST_REQUIRE(trendModel.timeShift() == timeShift1 + timeShift2); } +BOOST_FIXTURE_TEST_CASE(testChecksum, CTestFixture) { + // Create two identical rules + CDetectionRule rule1; + CDetectionRule rule2; + + // Compute checksums + std::uint64_t checksum1 = rule1.checksum(); + std::uint64_t checksum2 = rule2.checksum(); + + // Verify that identical rules have the same checksum + BOOST_REQUIRE_EQUAL(checksum1, checksum2); + + // Test actions + // Modify the action of rule2 + rule2.action(CDetectionRule::E_SkipModelUpdate); + + // Verify that different actions result in different checksums + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_NE(checksum1, checksum2); + + // Test conditions + // Reset rule2 to be identical to rule1 + rule2 = rule1; + + // Add a condition to rule2 + CRuleCondition condition; + condition.appliesTo(CRuleCondition::E_Actual); + condition.op(CRuleCondition::E_GT); + condition.value(100.0); + rule2.addCondition(condition); + + // Verify that adding a condition changes the checksum + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_NE(checksum1, checksum2); + + // Add the same condition to rule1 + rule1.addCondition(condition); + + // Verify that identical conditions result in the same checksum + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_EQUAL(checksum1, checksum2); + + // Modify the condition in rule2 + condition.value(200.0); + rule2.clearConditions(); + rule2.addCondition(condition); + + // Verify that different condition values result in different checksums + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_NE(checksum1, checksum2); + + // Test Scope + rule2 = rule1; + + // Modify the scope of rule2 + std::string fieldName = "user"; + core::CPatternSet valueFilter; + valueFilter.initFromPatternList({"admin"}); + rule2.includeScope(fieldName, valueFilter); + + // Verify that different scopes result in different checksums + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_NE(checksum1, checksum2); + + // Add the same scope to rule1 + rule1.includeScope(fieldName, valueFilter); + + // Verify that identical scopes result in the same checksum + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_EQUAL(checksum1, checksum2); + + // Test Time Shift + // Modify the time shift in rule2 + rule2.addTimeShift(3600); + + // Verify that different time shifts result in different checksums + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_NE(checksum1, checksum2); + + // Add the same time shift to rule1 + rule1.addTimeShift(3600); + + // Verify that identical time shifts result in the same checksum + checksum1 = rule1.checksum(); + checksum2 = rule2.checksum(); + BOOST_REQUIRE_EQUAL(checksum1, checksum2); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/model/unittest/CForecastModelPersistTest.cc b/lib/model/unittest/CForecastModelPersistTest.cc index b932b1423f..d0aa03af22 100644 --- a/lib/model/unittest/CForecastModelPersistTest.cc +++ b/lib/model/unittest/CForecastModelPersistTest.cc @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -39,8 +40,9 @@ BOOST_AUTO_TEST_CASE(testPersistAndRestore) { params.s_DecayRate = 0.001; params.s_LearnRate = 1.0; params.s_MinimumTimeToDetectChange = 6 * core::constants::HOUR; - params.s_MaximumTimeToTestForChange = core::constants::DAY; - maths::time_series::CTimeSeriesDecomposition trend(params.s_DecayRate, bucketLength); + double trendDecayRate{CAnomalyDetectorModelConfig::trendDecayRate( + params.s_DecayRate, bucketLength)}; + maths::time_series::CTimeSeriesDecomposition trend(trendDecayRate, bucketLength); maths::common::CNormalMeanPrecConjugate prior{ maths::common::CNormalMeanPrecConjugate::nonInformativePrior( diff --git a/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc b/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc index b16640f2b6..b26b727185 100644 --- a/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc +++ b/lib/model/unittest/CHierarchicalResultsLevelSetTest.cc @@ -10,6 +10,7 @@ */ #include +#include #include #include @@ -26,6 +27,8 @@ struct STestNode { STestNode(const std::string& name) : s_Name(name) {} std::string print() const { return s_Name; } std::string s_Name; + + std::size_t memoryUsage() const { return sizeof(s_Name); } }; class CConcreteHierarchicalResultsLevelSet @@ -131,4 +134,29 @@ BOOST_AUTO_TEST_CASE(testElements) { } } +BOOST_AUTO_TEST_CASE(testMemoryUsage) { + CConcreteHierarchicalResultsLevelSet levelSet(STestNode("root")); + std::size_t memoryUsage = levelSet.memoryUsage(); + BOOST_REQUIRE(memoryUsage > 0); + + auto addAndCheckMemoryUsage = [&memoryUsage, &levelSet](auto& container, + const std::string& name) { + container.emplace_back(ml::core::CCompressedDictionary<1>::CWord(), + STestNode(name)); + std::size_t newMemoryUsage = levelSet.memoryUsage(); + BOOST_REQUIRE(newMemoryUsage > memoryUsage); + memoryUsage = newMemoryUsage; + }; + + addAndCheckMemoryUsage(levelSet.m_InfluencerBucketSet, "influencer bucket 1"); + addAndCheckMemoryUsage(levelSet.m_InfluencerSet, "influencer 1"); + addAndCheckMemoryUsage(levelSet.m_PartitionSet, "partition 1"); + addAndCheckMemoryUsage(levelSet.m_PersonSet, "person 1"); + addAndCheckMemoryUsage(levelSet.m_LeafSet, "leaf 1"); + + auto debugMemoryUsage = std::make_shared(); + levelSet.debugMemoryUsage(debugMemoryUsage); + BOOST_REQUIRE(debugMemoryUsage->usage() == memoryUsage); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/model/unittest/CHierarchicalResultsTest.cc b/lib/model/unittest/CHierarchicalResultsTest.cc index 91b4c57f9e..df332506c3 100644 --- a/lib/model/unittest/CHierarchicalResultsTest.cc +++ b/lib/model/unittest/CHierarchicalResultsTest.cc @@ -1533,7 +1533,8 @@ BOOST_AUTO_TEST_CASE(testNormalizer) { model::CAnomalyDetectorModelConfig::defaultConfig(); model::CHierarchicalResultsAggregator aggregator(modelConfig); model::CHierarchicalResultsProbabilityFinalizer finalizer; - model::CHierarchicalResultsNormalizer normalizer(modelConfig); + model::CLimits l; + model::CHierarchicalResultsNormalizer normalizer(l, modelConfig); static const std::string FUNC("max"); static const ml::model::function_t::EFunction function(ml::model::function_t::E_IndividualMetricMax); @@ -1731,7 +1732,8 @@ BOOST_AUTO_TEST_CASE(testNormalizer) { LOG_DEBUG(<< "Compressed JSON doc is:\n" << origJson); { - model::CHierarchicalResultsNormalizer newNormalizerJson(modelConfig); + model::CLimits limits; + model::CHierarchicalResultsNormalizer newNormalizerJson(limits, modelConfig); std::stringstream stream(origJson); BOOST_REQUIRE_EQUAL(model::CHierarchicalResultsNormalizer::E_Ok, newNormalizerJson.fromJsonStream(stream)); @@ -1761,7 +1763,8 @@ BOOST_AUTO_TEST_CASE(testNormalizer) { } while (filteredInput); LOG_DEBUG(<< "Uncompressed JSON doc is:\n" << uncompressedJson); - model::CHierarchicalResultsNormalizer newNormalizerJson(modelConfig); + model::CLimits limits; + model::CHierarchicalResultsNormalizer newNormalizerJson(limits, modelConfig); std::stringstream stream(uncompressedJson); BOOST_REQUIRE_EQUAL(model::CHierarchicalResultsNormalizer::E_Ok, newNormalizerJson.fromJsonStream(stream)); diff --git a/lib/model/unittest/CMakeLists.txt b/lib/model/unittest/CMakeLists.txt index 47a28b792a..8e6d6dcf48 100644 --- a/lib/model/unittest/CMakeLists.txt +++ b/lib/model/unittest/CMakeLists.txt @@ -30,8 +30,8 @@ set (SRCS CForecastModelPersistTest.cc CFunctionTypesTest.cc CGathererToolsTest.cc - CHierarchicalResultsTest.cc CHierarchicalResultsLevelSetTest.cc + CHierarchicalResultsTest.cc CInterimBucketCorrectorTest.cc CLimitsTest.cc CLocalCategoryIdTest.cc