Skip to content

Commit 1c44f76

Browse files
authored
Merge pull request #899 from mickem/debugging_core_issue
Debugging core issue t3
2 parents d10d198 + 2108afa commit 1c44f76

File tree

7 files changed

+142
-28
lines changed

7 files changed

+142
-28
lines changed

docs/docs/reference/windows/CheckSystem.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ CPU Load ok|'total 5m'=16%;80;90 'total 1m'=13%;80;90 'total 5s'=13%;80;90
113113
<a name="check_cpu_show-default"/>
114114
<a name="check_cpu_help-short"/>
115115
<a name="check_cpu_time"/>
116+
<a name="check_cpu_cores"/>
116117
<a name="check_cpu_options"/>
117118
#### Command-line Arguments
118119

@@ -140,6 +141,7 @@ CPU Load ok|'total 5m'=16%;80;90 'total 1m'=13%;80;90 'total 5s'=13%;80;90
140141
| [detail-syntax](#check_cpu_detail-syntax) | ${time}: ${load}% | Detail level syntax. |
141142
| [perf-syntax](#check_cpu_perf-syntax) | ${core} ${time} | Performance alias syntax. |
142143
| time | | The time to check |
144+
| cores | N/A | This will remove the filter to include the cores, if you use filter dont use this as well. |
143145

144146

145147

@@ -2273,6 +2275,7 @@ Section for system checks and system settings
22732275
|-----------------------------------------------|---------------|--------------------------|
22742276
| [default buffer length](#default-buffer-time) | 1h | Default buffer time |
22752277
| [disable](#disable-automatic-checks) | | Disable automatic checks |
2278+
| [fetch core loads](#fetch-core-load) | true | Fetch core load |
22762279
| [subsystem](#pdh-subsystem) | default | PDH subsystem |
22772280

22782281

@@ -2281,6 +2284,7 @@ Section for system checks and system settings
22812284
# Section for system checks and system settings
22822285
[/settings/system/windows]
22832286
default buffer length=1h
2287+
fetch core loads=true
22842288
subsystem=default
22852289

22862290
```
@@ -2343,6 +2347,33 @@ disable=
23432347

23442348

23452349

2350+
#### Fetch core load <a id="/settings/system/windows/fetch core loads"></a>
2351+
2352+
Set to false to use a different API for fetching CPU load (will not provide core load, and will not show exact same values as task manager).
2353+
2354+
2355+
2356+
2357+
2358+
| Key | Description |
2359+
|----------------|-------------------------------------------------------|
2360+
| Path: | [/settings/system/windows](#/settings/system/windows) |
2361+
| Key: | fetch core loads |
2362+
| Advanced: | Yes (means it is not commonly used) |
2363+
| Default value: | `true` |
2364+
| Used by: | CheckSystem |
2365+
2366+
2367+
**Sample:**
2368+
2369+
```
2370+
[/settings/system/windows]
2371+
# Fetch core load
2372+
fetch core loads=true
2373+
```
2374+
2375+
2376+
23462377
#### PDH subsystem <a id="/settings/system/windows/subsystem"></a>
23472378

23482379
Set which pdh subsystem to use.

include/rrd_buffer.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,23 +63,23 @@ struct rrd_buffer {
6363
value_type ret;
6464
if (time < 0) return ret;
6565
if (time <= seconds.size()) {
66-
for (list_type::const_iterator cit = seconds.end() - time; cit != seconds.end(); ++cit) {
66+
for (typename list_type::const_iterator cit = seconds.end() - time; cit != seconds.end(); ++cit) {
6767
ret.add(*cit);
6868
}
6969
ret.normalize(time);
7070
return ret;
7171
}
7272
time /= 60;
7373
if (time <= minutes.size()) {
74-
for (list_type::const_iterator cit = minutes.end() - time; cit != minutes.end(); ++cit) {
74+
for (typename list_type::const_iterator cit = minutes.end() - time; cit != minutes.end(); ++cit) {
7575
ret.add(*cit);
7676
}
7777
ret.normalize(time);
7878
return ret;
7979
}
8080
time /= 60;
8181
if (time >= hours.size()) throw nsclient::nsclient_exception("Size larger than buffer");
82-
for (list_type::const_iterator cit = hours.end() - time; cit != hours.end(); ++cit) {
82+
for (typename list_type::const_iterator cit = hours.end() - time; cit != hours.end(); ++cit) {
8383
ret.add(*cit);
8484
}
8585
ret.normalize(time);

include/win_sysinfo/win_sysinfo.cpp

Lines changed: 62 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
* along with NSClient++. If not, see <http://www.gnu.org/licenses/>.
1818
*/
1919

20-
#include <win/windows.hpp>
20+
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
21+
#include <windows.h>
2122

2223
#include <win_sysinfo/win_defines.hpp>
2324
#include <win_sysinfo/win_sysinfo.hpp>
@@ -225,7 +226,7 @@ bool g_hasVersion = false;
225226
bool g_hasBasicInfo = false;
226227

227228
boost::scoped_array<unsigned long long> g_CPUIdleTimeOld;
228-
boost::scoped_array<unsigned long long> g_CPUTotalTimeOld;
229+
boost::scoped_array<unsigned long long> g_CPUUserTimeOld;
229230
boost::scoped_array<unsigned long long> g_CPUKernelTimeOld;
230231

231232
void init_old_buffer(boost::scoped_array<unsigned long long> &array, const std::size_t size) {
@@ -341,16 +342,22 @@ hlp::buffer<BYTE, winapi::SYSTEM_PROCESS_INFORMATION *> system_info::get_system_
341342
throw nsclient::nsclient_exception("Failed to enumerate processes: unknown error");
342343
}
343344

344-
system_info::cpu_load system_info::get_cpu_load() {
345+
double get_rate(unsigned long long part, unsigned long long total) {
346+
auto part_d = static_cast<double>(part);
347+
auto total_d = static_cast<double>(total);
348+
return (part_d * 100.0) / total_d;
349+
}
350+
351+
system_info::cpu_load system_info::get_cpu_load_per_core() {
345352
int cores = get_numberOfProcessorscores();
346353
init_old_buffer(g_CPUIdleTimeOld, cores);
347-
init_old_buffer(g_CPUTotalTimeOld, cores);
354+
init_old_buffer(g_CPUUserTimeOld, cores);
348355
init_old_buffer(g_CPUKernelTimeOld, cores);
349356

350357
boost::scoped_array<winapi::SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION> buffer(new winapi::SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION[cores]);
351358
if (winapi::NtQuerySystemInformation(winapi::SystemProcessorPerformanceInformation, &buffer[0],
352359
sizeof(winapi::SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION) * cores, NULL) != 0) {
353-
throw nsclient::nsclient_exception("Whoops");
360+
throw nsclient::nsclient_exception("Failed to fetch cpu load");
354361
}
355362

356363
cpu_load result;
@@ -360,18 +367,20 @@ system_info::cpu_load system_info::get_cpu_load() {
360367

361368
for (int i = 0; i < cores; i++) {
362369
unsigned long long CPUIdleTime = buffer[i].IdleTime.QuadPart;
363-
unsigned long long CPUKernelTime = buffer[i].KernelTime.QuadPart - buffer[i].IdleTime.QuadPart;
364-
unsigned long long CPUTotalTime = buffer[i].KernelTime.QuadPart + buffer[i].UserTime.QuadPart;
370+
unsigned long long CPUKernelTime = buffer[i].KernelTime.QuadPart;
371+
unsigned long long CPUUserTime = buffer[i].UserTime.QuadPart;
365372

366373
unsigned long long CPUIdleTimeDiff = CPUIdleTime - g_CPUIdleTimeOld[i];
367374
unsigned long long CPUKernelTimeDiff = CPUKernelTime - g_CPUKernelTimeOld[i];
368-
unsigned long long CPUTotalTimeDiff = CPUTotalTime - g_CPUTotalTimeOld[i];
375+
unsigned long long CPUUserTimeDiff = CPUUserTime - g_CPUUserTimeOld[i];
369376

370-
if (CPUTotalTimeDiff != 0) {
377+
unsigned long long kernel_time_diff = CPUKernelTimeDiff - CPUIdleTimeDiff;
378+
unsigned long long total_time_diff = CPUKernelTimeDiff + CPUUserTimeDiff;
379+
if (total_time_diff != 0) {
371380
result.core[i].core = i;
372-
result.core[i].idle = static_cast<double>(((CPUIdleTimeDiff * 100) / CPUTotalTimeDiff));
373-
result.core[i].kernel = static_cast<double>(((CPUKernelTimeDiff * 100) / CPUTotalTimeDiff));
374-
result.core[i].total = 100.0 - result.core[i].idle;
381+
result.core[i].idle = get_rate(CPUIdleTimeDiff, total_time_diff);
382+
result.core[i].kernel = get_rate(kernel_time_diff, total_time_diff);
383+
result.core[i].total = 100 - result.core[i].idle;
375384
result.total.idle += result.core[i].idle;
376385
result.total.kernel += result.core[i].kernel;
377386
result.total.total += result.core[i].total;
@@ -380,16 +389,56 @@ system_info::cpu_load system_info::get_cpu_load() {
380389
result.core[i].kernel = 0;
381390
result.core[i].total = 0;
382391
}
383-
g_CPUTotalTimeOld[i] = CPUTotalTime;
384392
g_CPUIdleTimeOld[i] = CPUIdleTime;
385393
g_CPUKernelTimeOld[i] = CPUKernelTime;
394+
g_CPUUserTimeOld[i] = CPUUserTime;
386395
}
387396
result.total.idle /= result.cores;
388397
result.total.kernel /= result.cores;
389398
result.total.total /= result.cores;
390399
return result;
391400
}
392401

402+
system_info::cpu_load system_info::get_cpu_load_total() {
403+
init_old_buffer(g_CPUIdleTimeOld, 1);
404+
init_old_buffer(g_CPUUserTimeOld, 1);
405+
init_old_buffer(g_CPUKernelTimeOld, 1);
406+
407+
FILETIME lpIdleTime;
408+
FILETIME lpKernelTime;
409+
FILETIME lpUserTime;
410+
411+
if (GetSystemTimes(&lpIdleTime, &lpKernelTime, &lpUserTime) == 0) {
412+
throw nsclient::nsclient_exception("Failed to fetch cpu load");
413+
}
414+
unsigned long long CPUIdleTime = (static_cast<unsigned long long>(lpIdleTime.dwHighDateTime) << 32) | lpIdleTime.dwLowDateTime;
415+
unsigned long long CPUKernelTime = (static_cast<unsigned long long>(lpKernelTime.dwHighDateTime) << 32) | lpKernelTime.dwLowDateTime;
416+
unsigned long long CPUUserTime = (static_cast<unsigned long long>(lpUserTime.dwHighDateTime) << 32) | lpUserTime.dwLowDateTime;
417+
418+
cpu_load result;
419+
result.cores = 0;
420+
result.core.resize(0);
421+
result.total.idle = result.total.kernel = result.total.total = 0.0;
422+
423+
unsigned long long CPUIdleTimeDiff = CPUIdleTime - g_CPUIdleTimeOld[0];
424+
// Kernel also includes idle time so we need to subtract that
425+
unsigned long long CPUKernelTimeDiff = CPUKernelTime - g_CPUKernelTimeOld[0];
426+
unsigned long long CPUUserTimeDiff = CPUUserTime - g_CPUUserTimeOld[0];
427+
428+
unsigned long long kernel_time_diff = CPUKernelTimeDiff - CPUIdleTimeDiff;
429+
unsigned long long used_time_diff = kernel_time_diff + CPUUserTimeDiff;
430+
unsigned long long total_time_diff = CPUKernelTimeDiff + CPUUserTimeDiff;
431+
432+
result.total.idle = get_rate(CPUIdleTimeDiff, total_time_diff);
433+
result.total.kernel = get_rate(kernel_time_diff, total_time_diff);
434+
result.total.total = get_rate(used_time_diff, total_time_diff);
435+
436+
g_CPUIdleTimeOld[0] = CPUIdleTime;
437+
g_CPUKernelTimeOld[0] = CPUKernelTime;
438+
g_CPUUserTimeOld[0] = CPUUserTime;
439+
return result;
440+
}
441+
393442
class CheckMemory {
394443
public:
395444
CheckMemory() : hKernel32(NULL), FEGlobalMemoryStatusEx(NULL), FEGlobalMemoryStatus(NULL) {

include/win_sysinfo/win_sysinfo.hpp

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,15 @@ struct system_info {
5454
double kernel;
5555
int core;
5656
load_entry() : idle(0.0), total(0.0), kernel(0.0), core(-1) {}
57-
void add(const load_entry &other) {
57+
load_entry(const load_entry& obj) : idle(obj.idle), total(obj.total), kernel(obj.kernel), core(obj.core) {}
58+
load_entry& operator=(const load_entry& obj) {
59+
idle = obj.idle;
60+
total = obj.total;
61+
kernel = obj.kernel;
62+
core = obj.core;
63+
return *this;
64+
}
65+
void add(const load_entry& other) {
5866
idle += other.idle;
5967
total += other.total;
6068
kernel += other.kernel;
@@ -71,7 +79,15 @@ struct system_info {
7179
std::vector<load_entry> core;
7280
load_entry total;
7381
cpu_load() : cores(0) {}
74-
void add(const cpu_load &n) {
82+
cpu_load(const cpu_load& obj) : cores(obj.cores), core(obj.core), total(obj.total) {}
83+
cpu_load& operator=(const cpu_load& obj) {
84+
cores = obj.cores;
85+
core = obj.core;
86+
total = obj.total;
87+
return *this;
88+
}
89+
90+
void add(const cpu_load& n) {
7591
total.add(n.total);
7692
cores = max(cores, n.cores);
7793
core.resize(cores);
@@ -81,7 +97,7 @@ struct system_info {
8197
}
8298
void normalize(double value) {
8399
total.normalize(value);
84-
for (load_entry &c : core) {
100+
for (load_entry& c : core) {
85101
c.normalize(value);
86102
}
87103
}
@@ -103,7 +119,7 @@ struct system_info {
103119
long long peak_usage;
104120
std::string name;
105121
pagefile_info(const std::string name = "") : size(0), usage(0), peak_usage(0), name(name) {}
106-
void add(const pagefile_info &other) {
122+
void add(const pagefile_info& other) {
107123
size += other.size;
108124
usage += other.usage;
109125
peak_usage += other.peak_usage;
@@ -114,14 +130,16 @@ struct system_info {
114130

115131
static std::string get_version_string();
116132
static unsigned long get_version();
117-
static OSVERSIONINFOEX *get_versioninfo();
133+
static OSVERSIONINFOEX* get_versioninfo();
118134
static long get_numberOfProcessorscores();
119135
static std::vector<std::string> get_suite_list();
120136
static long long get_suite_i();
121137

122-
static cpu_load get_cpu_load();
138+
static cpu_load get_cpu_load_per_core();
139+
static cpu_load get_cpu_load_total();
140+
123141
static memory_usage get_memory();
124-
static hlp::buffer<BYTE, windows::winapi::SYSTEM_PROCESS_INFORMATION *> get_system_process_information(int size = 0x32000);
142+
static hlp::buffer<BYTE, windows::winapi::SYSTEM_PROCESS_INFORMATION*> get_system_process_information(int size = 0x32000);
125143
};
126144

127145
namespace winapi {

modules/CheckSystem/CheckSystem.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,11 @@ bool CheckSystem::loadModuleEx(std::string alias, NSCAPI::moduleLoadMode mode) {
177177
("subsystem", sh::string_key(&collector->subsystem, "default"),
178178
"PDH subsystem", "Set which pdh subsystem to use.\nCurrently default and thread-safe are supported where thread-safe is slower but required if you have some problematic counters.", true)
179179

180-
("disable", sh::string_key(&collector->disable_, ""),
180+
("fetch core loads", sh::bool_key(&collector->read_core_load, "true"),
181+
"Fetch core load", "Set to false to use a different API for fetching CPU load (will not provide core load, and will not show exact same values as task manager).", true)
182+
183+
184+
("disable", sh::string_key(&collector->disable_, ""),
181185
"Disable automatic checks", "A comma separated list of checks to disable in the collector: cpu,handles,network,metrics,pdh. Please note disabling these will mean part of NSClient++ will no longer function as expected.", true)
182186
;
183187
;
@@ -559,17 +563,24 @@ void CheckSystem::check_cpu(const PB::Commands::QueryRequestMessage::Request &re
559563
modern_filter::data_container data;
560564
modern_filter::cli_helper<filter_type> filter_helper(request, response, data);
561565
std::vector<std::string> times;
566+
bool show_all_cores = false;
562567

563568
filter_type filter;
564569
filter_helper.add_options("load > 80", "load > 90", "core = 'total'", filter.get_filter_syntax(), "ignored");
565-
filter_helper.add_syntax("${status}: ${problem_list}", "${time}: ${load}%", "${core} ${time}", "", "%(status): CPU load is ok.");
570+
filter_helper.add_syntax("${status}: ${problem_list}", "${time}: ${load}%", "${core} ${time}", "", "%(status): CPU load is ok.");
566571
filter_helper.get_desc().add_options()
567-
("time", po::value<std::vector<std::string>>(&times), "The time to check")
572+
("time", po::value<std::vector<std::string>>(&times), "The time to check")
573+
("cores", boost::program_options::bool_switch(&show_all_cores),
574+
"This will remove the filter to include the cores, if you use filter dont use this as well.")
568575
;
569576

570577
if (!filter_helper.parse_options())
571578
return;
572579

580+
if (show_all_cores && filter_helper.data.filter_string.size() == 1) {
581+
filter_helper.data.filter_string.clear();
582+
}
583+
573584
if (times.empty()) {
574585
times.push_back("5m");
575586
times.push_back("1m");

modules/CheckSystem/pdh_thread.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,11 @@ void pdh_thread::thread_proc() {
282282
windows::system_info::cpu_load load;
283283
if (!disable_cpu) {
284284
try {
285-
load = windows::system_info::get_cpu_load();
285+
if (read_core_load) {
286+
load = windows::system_info::get_cpu_load_per_core();
287+
} else {
288+
load = windows::system_info::get_cpu_load_total();
289+
}
286290
} catch (...) {
287291
errors.push_back("Failed to get cpu load");
288292
}

modules/CheckSystem/pdh_thread.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,14 @@ class pdh_thread {
6969
network_check::network_data network;
7070
public:
7171

72+
bool read_core_load;
7273
std::string subsystem;
7374
std::string disable_;
7475
std::string default_buffer_size;
7576

7677
public:
7778

78-
pdh_thread(nscapi::core_wrapper *core, int plugin_id) : core(core), plugin_id(plugin_id) {
79+
pdh_thread(nscapi::core_wrapper *core, int plugin_id) : core(core), plugin_id(plugin_id), read_core_load(true) {
7980
mutex_.lock();
8081
}
8182
void add_counter(const PDH::pdh_object &counter);

0 commit comments

Comments
 (0)