diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 904920e3db..7ca5a32337 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -83,7 +84,9 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberMemoryLimitModelCreationFailures, ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, - ml::counter_t::E_TSADOutputMemoryAllocatorUsage}; + ml::counter_t::E_TSADOutputMemoryAllocatorUsage, + ml::counter_t::E_TSADSystemMemoryUsage, + ml::counter_t::E_TSADMaxSystemMemoryUsage}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); @@ -151,6 +154,8 @@ int main(int argc, char** argv) { } cancellerThread.stop(); + LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); + LOG_DEBUG(<< "Resident Set Size: " << ml::core::CProcessStats::residentSetSize()); // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program // statically links its own version library. diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 3b119797c1..aeb1f7748c 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -33,6 +33,7 @@ === Enhancements * Track memory used in the hierarchical results normalizer. (See {ml-pull}2831[#2831].) +* Report the actual memory usage of the autodetect process. (See {ml-pull}2846[#2846]) === Bug Fixes diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 3c4d10269f..4fde6670ff 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -112,6 +112,12 @@ enum ECounterTypes { //! The memory currently used by the allocators to output JSON documents, in bytes. E_TSADOutputMemoryAllocatorUsage = 30, + //! The resident set size of the process, in bytes. + E_TSADSystemMemoryUsage = 31, + + //! The maximum resident set size of the process, in bytes. + E_TSADMaxSystemMemoryUsage = 32, + // Data Frame Outlier Detection //! The estimated peak memory usage for outlier detection in bytes @@ -146,7 +152,7 @@ enum ECounterTypes { // Add any new values here //! This MUST be last, increment the value for every new enum added - E_LastEnumCounter = 31 + E_LastEnumCounter = 33 }; static constexpr std::size_t NUM_COUNTERS = static_cast(E_LastEnumCounter); @@ -355,6 +361,10 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, + {counter_t::E_TSADSystemMemoryUsage, "E_TSADSystemMemoryUsage", + "The amount of system memory used by the process, in bytes"}, + {counter_t::E_TSADMaxSystemMemoryUsage, "E_TSADMaxSystemMemoryUsage", + "The maximum amount of system memory used by the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", "The upfront estimate of the peak memory outlier detection would use"}, {counter_t::E_DFOPeakMemoryUsage, "E_DFOPeakMemoryUsage", "The peak memory outlier detection used"}, diff --git a/include/model/CProcessMemoryUsage.h b/include/model/CProcessMemoryUsage.h new file mode 100644 index 0000000000..8ea1a2057c --- /dev/null +++ b/include/model/CProcessMemoryUsage.h @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#ifndef INCLUDED_ml_model_CSystemMemoryUsage_h +#define INCLUDED_ml_model_CSystemMemoryUsage_h + +#include + +#include + +namespace ml { +namespace model { + +//! \brief Determines how to calculate the memory used by the current process. +//! +//! DESCRIPTION:\n +//! Determines how to calculate the memory used by the current process based on the operating system. +//! On some OS's (Mac, Windows) we use the estimated memory usage of the models, +//! while on others (Linux) we use the actual memory of the process as provided by system calls. +class MODEL_EXPORT CProcessMemoryUsage { +public: + enum class EMemoryStrategy { E_Estimated, E_System }; + + static const EMemoryStrategy MEMORY_STRATEGY; + +public: + CProcessMemoryUsage() = delete; +}; +} +} + +#endif //INCLUDED_ml_model_CSystemMemoryUsage_h diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 5c7583888b..411c3671e9 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -180,6 +180,12 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; + //! Returns the current physical memory of the process (rss) as reported by the system + static std::size_t systemMemory(); + + //! Returns the maximum physical memory of the process (max rss) as reported by the system + static std::size_t maxSystemMemory(); + private: using TMonitoredResourcePtrSizeUMap = boost::unordered_map; @@ -229,6 +235,9 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the amount by which reported memory usage is scaled depending on the type of persistence in use std::size_t persistenceMemoryIncreaseFactor() const; + //! Modify the supplied usage value depending on a platform dependent strategy. + std::size_t applyMemoryStrategy(std::size_t usage) const; + private: //! The registered collection of components TMonitoredResourcePtrSizeUMap m_Resources; diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 3a8c06be2a..a334fe34dc 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -205,6 +205,10 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields, TOptionalTime t } ++core::CProgramCounters::counter(counter_t::E_TSADNumberApiRecordsHandled); + core::CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = + model::CResourceMonitor::systemMemory(); + core::CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = + model::CResourceMonitor::maxSystemMemory(); ++m_NumRecordsHandled; m_LatestRecordTime = std::max(m_LatestRecordTime, *time); diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 43fef49602..68b5b80c88 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -25,6 +25,8 @@ const std::string JOB_ID{"job_id"}; const std::string MODEL_SIZE_STATS{"model_size_stats"}; const std::string MODEL_BYTES{"model_bytes"}; const std::string PEAK_MODEL_BYTES{"peak_model_bytes"}; +const std::string SYSTEM_MEMORY_BYTES{"system_memory_bytes"}; +const std::string MAX_SYSTEM_MEMORY_BYTES{"max_system_memory_bytes"}; const std::string MODEL_BYTES_EXCEEDED{"model_bytes_exceeded"}; const std::string MODEL_BYTES_MEMORY_LIMIT{"model_bytes_memory_limit"}; const std::string TOTAL_BY_FIELD_COUNT{"total_by_field_count"}; diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index b003e90a53..e4cffde084 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -9,6 +9,7 @@ * limitation. */ #include +#include #include #include @@ -105,8 +106,6 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); model::CLimits limits; - //limits.resourceMonitor().m_ByteLimitHigh = 100000; - //limits.resourceMonitor().m_ByteLimitLow = 90000; { LOG_TRACE(<< "Setting up job"); @@ -129,6 +128,7 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { nonLimitedUsage = limits.resourceMonitor().totalMemory(); } } + LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index ba44163e7c..16772158f8 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1733,7 +1733,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_OverFields = 7; resourceUsage.s_AllocationFailures = 8; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisCurrentModelBytes; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisPeakModelBytes; resourceUsage.s_BucketStartTime = 9; resourceUsage.s_BytesExceeded = 10; resourceUsage.s_BytesMemoryLimit = 11; @@ -1785,7 +1785,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("current_model_bytes", + BOOST_REQUIRE_EQUAL("peak_model_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index 9aa1e969c9..2f1d02c2e2 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -8,9 +8,10 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ -#include #include +#include + #include #include #include diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index 7ca2d7e6c0..7c612270b3 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -8,8 +8,9 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ -#include #include + +#include #include #include @@ -36,6 +37,7 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "Failed to retrieve memory info " << CWindowsError()); return 0; } + return static_cast(stats.PeakWorkingSetSize); } } diff --git a/lib/core/unittest/CLoggerTest.cc b/lib/core/unittest/CLoggerTest.cc index 1abec95da0..8667aa1430 100644 --- a/lib/core/unittest/CLoggerTest.cc +++ b/lib/core/unittest/CLoggerTest.cc @@ -62,7 +62,7 @@ std::function makeReader(std::ostringstream& loggedData) { return; } } - BOOST_FAIL("Failed to connect to logging pipe within a reasonable time"); + BOOST_TEST_CHECK(false, "Failed to connect to logging pipe within a reasonable time"); }; } diff --git a/lib/core/unittest/CNamedPipeFactoryTest.cc b/lib/core/unittest/CNamedPipeFactoryTest.cc index 39aef5e07e..6ad24c5829 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.cc +++ b/lib/core/unittest/CNamedPipeFactoryTest.cc @@ -167,6 +167,7 @@ BOOST_AUTO_TEST_CASE(testServerIsCWriter) { ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME, dummy)}; BOOST_TEST_REQUIRE(file); + sleep(1); std::size_t charsLeft{TEST_SIZE}; std::size_t blockSize{7}; while (charsLeft > 0) { diff --git a/lib/model/CMakeLists.txt b/lib/model/CMakeLists.txt index c53eec9fb0..73dd74e4d0 100644 --- a/lib/model/CMakeLists.txt +++ b/lib/model/CMakeLists.txt @@ -75,6 +75,7 @@ ml_add_library(MlModel SHARED CSampleCounts.cc CSearchKey.cc CSimpleCountDetector.cc + CProcessMemoryUsage.cc CTokenListCategory.cc CTokenListDataCategorizerBase.cc CTokenListReverseSearchCreator.cc diff --git a/lib/model/CProcessMemoryUsage.cc b/lib/model/CProcessMemoryUsage.cc new file mode 100644 index 0000000000..176a9825d2 --- /dev/null +++ b/lib/model/CProcessMemoryUsage.cc @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +namespace ml { +namespace model { +// On platforms other than Linux the process memory usage is the estimated size of the models. +const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{ + EMemoryStrategy::E_Estimated}; +} +} diff --git a/lib/model/CProcessMemoryUsage_Linux.cc b/lib/model/CProcessMemoryUsage_Linux.cc new file mode 100644 index 0000000000..ff3a52151b --- /dev/null +++ b/lib/model/CProcessMemoryUsage_Linux.cc @@ -0,0 +1,21 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +namespace ml { +namespace model { + +// On Linux the process memory usage is determined by the OS. +const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{ + EMemoryStrategy::E_System}; +} +} diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index d93b3b8bd8..83ec624b39 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -20,6 +21,7 @@ #include #include +#include #include #include @@ -382,7 +384,7 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); - res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; + res.s_BytesMemoryLimit = this->getBytesMemoryLimit(); res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; std::uint64_t assignmentMemoryBasis{ @@ -400,6 +402,22 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { return res; } +std::size_t CResourceMonitor::applyMemoryStrategy(std::size_t usage) const { + std::size_t modifiedUsage{0}; + switch (CProcessMemoryUsage::MEMORY_STRATEGY) { + case CProcessMemoryUsage::EMemoryStrategy::E_Estimated: { + modifiedUsage = usage; + break; + } + case CProcessMemoryUsage::EMemoryStrategy::E_System: { + modifiedUsage = core::CProcessStats::maxResidentSetSize(); + break; + } + default: { LOG_WARN(<< "Unknown memory strategy"); } + } + return modifiedUsage; +} + std::size_t CResourceMonitor::adjustedUsage(std::size_t usage) const { // We scale the reported memory usage by the inverse of the byte limit margin. // This gives the user a fairer indication of how close the job is to hitting @@ -486,10 +504,17 @@ std::size_t CResourceMonitor::lowLimit() const { } std::size_t CResourceMonitor::totalMemory() const { - return m_MonitoredResourceCurrentMemory + m_ExtraMemory + - static_cast(core::CProgramCounters::counter( - counter_t::E_TSADOutputMemoryAllocatorUsage)); + return this->applyMemoryStrategy(m_MonitoredResourceCurrentMemory + m_ExtraMemory + + static_cast(core::CProgramCounters::counter( + counter_t::E_TSADOutputMemoryAllocatorUsage))); +} + +std::size_t CResourceMonitor::systemMemory() { + return core::CProcessStats::residentSetSize(); } +std::size_t CResourceMonitor::maxSystemMemory() { + return core::CProcessStats::maxResidentSetSize(); +} } // model } // ml diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index f69dccc384..b11c79aca5 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -536,7 +536,7 @@ BOOST_FIXTURE_TEST_CASE(testExtraMemory, CTestFixture) { } BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { - // Clear the counter so that other test cases do not interfere. + // Clear the counters so that other test cases do not interfere. core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage) = 0; CLimits limits;