-
Notifications
You must be signed in to change notification settings - Fork 269
Description
[2025-05-13 12:52:14,713 main.py:237 INFO] Detected system ID: B200-SXM-180GBx8
[2025-05-13 12:52:14,873 harness.py:239 INFO] The harness will load 2 plugins: ['build/plugins/NMSOptPlugin/libnmsoptplugin.so', 'build/plugins/retinanetConcatPlugin/libretinanetconcatplugin.so']
[2025-05-13 12:52:14,874 generate_conf_files.py:107 INFO] Generated measurements/ entries for B200-SXM-180GBx8_TRT/retinanet/Offline
[2025-05-13 12:52:14,874 harness.py:355 INFO] Using harness launch command...
[2025-05-13 12:52:14,874 init.py:46 INFO] Running command: ./build/bin/harness_default --plugins="build/plugins/NMSOptPlugin/libnmsoptplugin.so,build/plugins/retinanetConcatPlugin/libretinanetconcatplugin.so" --logfile_outdir="/work/inference_results_v5.0/closed/NVIDIA/build/logs/2025.05.13-12.52.04/B200-SXM-180GBx8_TRT/retinanet/Offline" --logfile_prefix="mlperf_log_" --performance_sample_count=64 --test_mode="AccuracyOnly" --gpu_inference_streams=2 --run_infer_on_copy_streams=false --gpu_batch_size=48 --map_path="data_maps/open-images-v6-mlperf/val_map.txt" --tensor_path="build/preprocessed_data/open-images-v6-mlperf/validation/Retinanet/int8_linear" --use_graphs=false --gpu_engine_batch_size="48" --gpu_engines="./build/engines/B200-SXM-180GBx8/retinanet/Offline/retinanet-Offline-gpu-retinanet-b48-int8.lwis_k_99_MaxP.plan" --mlperf_conf_path="build/loadgen-configs/B200-SXM-180GBx8_TRT/retinanet/Offline/mlperf.conf" --user_conf_path="build/loadgen-configs/B200-SXM-180GBx8_TRT/retinanet/Offline/user.conf" --max_dlas=0 --scenario="Offline" --model="retinanet" --scenario Offline --model retinanet --response_postprocess openimageeffnms
[2025-05-13 12:52:14,874 init.py:53 INFO] Overriding Environment
benchmark : Benchmark.Retinanet
buffer_manager_thread_count : 0
data_dir : /work/Scratch_space/data
gpu_batch_size : 48
gpu_inference_streams : 2
input_dtype : int8
input_format : linear
log_dir : /work/inference_results_v5.0/closed/NVIDIA/build/logs/2025.05.13-12.52.04
map_path : data_maps/open-images-v6-mlperf/val_map.txt
offline_expected_qps : 13600
precision : int8
preprocessed_data_dir : /work/Scratch_space/preprocessed_data
run_infer_on_copy_streams : False
scenario : Scenario.Offline
system : System(cpu=CPU(name='INTEL(R) XEON(R) PLATINUM 8592V', architecture=<CPUArchitecture.x86_64: AliasedName(name='x86_64', aliases=(), patterns=())>, vendor='GenuineIntel', cores_per_group=64, threads_per_core=1, n_groups=2, group_type=<GroupType.Socket: 'socket'>, numa_nodes=[[Interval(start=0, end=63)], [Interval(start=64, end=127)]], flags={'arat', 'popcnt', 'mtrr', 'avx512_bf16', 'ssbd', 'avx', 'dtherm', 'pcid', 'sse2', 'avx512_vpopcntdq', 'fpu', 'fsgsbase', 'ept_ad', 'movdir64b', 'cldemote', 'avx512f', 'cx16', 'avx512vl', 'bus_lock_detect', 'avx512_vnni', '3dnowprefetch', 'aes', 'cpuid_fault', 'la57', 'avx512_fp16', 'ibrs', 'vme', 'pconfig', 'dts', 'ds_cpl', 'monitor', 'flexpriority', 'nopl', 'umip', 'rep_good', 'vpclmulqdq', 'wbnoinvd', 'tsc_deadline_timer', 'de', 'gfni', 'pdcm', 'constant_tsc', 'tsc', 'tsc_known_freq', 'pku', 'intel_pt', 'acpi', 'tm', 'flush_l1d', 'xsaveopt', 'pse36', 'pae', 'pdpe1gb', 'ssse3', 'mce', 'sep', 'pse', 'dtes64', 'arch_lbr', 'clwb', 'sse', 'avx_vnni', 'arch_perfmon', 'cpuid', 'sse4_1', 'bts', 'fsrm', 'xsaves', 'pge', 'fma', 'hfi', 'cdp_l2', 'rdtscp', 'cdp_l3', 'amx_tile', 'avx512_bitalg', 'cx8', 'serialize', 'erms', 'vaes', 'smap', 'mca', 'avx512bw', 'pat', 'vmx', 'xtpr', 'xsavec', 'mmx', 'clflushopt', 'xgetbv1', 'enqcmd', 'avx512cd', 'intel_ppin', 'aperfmperf', 'sha_ni', 'bmi1', 'mba', 'user_shstk', 'rdt_a', 'sdbg', 'xtopology', 'smx', 'tm2', 'cat_l3', 'adx', 'vpid', 'rdpid', 'avx512dq', 'cat_l2', 'epb', 'amx_bf16', 'nx', 'vnmi', 'est', 'ospke', 'tpr_shadow', 'rdrand', 'rdseed', 'invpcid', 'cqm', 'smep', 'cqm_occup_llc', 'avx512vbmi', 'cqm_mbm_local', 'cmov', 'ss', 'pts', 'md_clear', 'amx_int8', 'sse4_2', 'lahf_lm', 'dca', 'ida', 'pni', 'x2apic', 'pclmulqdq', 'art', 'split_lock_detect', 'fxsr', 'pln', 'syscall', 'cqm_mbm_total', 'pbe', 'ibt', 'avx512_vbmi2', 'f16c', 'bmi2', 'msr', 'apic', 'movbe', 'abm', 'ept', 'lm', 'nonstop_tsc', 'waitpkg', 'arch_capabilities', 'tme', 'cqm_llc', 'movdiri', 'ibrs_enhanced', 'stibp', 'ht', 'pebs', 'ibpb', 'tsc_adjust', 'avx2', 'tsxldtrk', 'avx512ifma', 'clflush', 'xsave'}, vulnerabilities={'Spec store bypass': 'Vulnerable', 'Spectre v1': 'Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers', 'Spectre v2': 'Vulnerable; IBPB: disabled; STIBP: disabled; PBRSB-eIBRS: Vulnerable; BHI: Vulnerable'}), host_memory=HostMemory(capacity=Memory(quantity=2.1134421039999998, byte_suffix=<ByteSuffix.TB: (1000, 4)>, _num_bytes=2113442104000)), accelerators={<class 'nvmitten.nvidia.accelerator.GPU'>: [GPU(name='NVIDIA B200', pci_id='0x290110DE', compute_sm=ComputeSM(major=10, minor=0), vram=Memory(quantity=178.3616943359375, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=191514411008), max_power_limit=1000.0, is_integrated=False, gpu_index=0), GPU(name='NVIDIA B200', pci_id='0x290110DE', compute_sm=ComputeSM(major=10, minor=0), vram=Memory(quantity=178.3616943359375, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=191514411008), max_power_limit=1000.0, is_integrated=False, gpu_index=1), GPU(name='NVIDIA B200', pci_id='0x290110DE', compute_sm=ComputeSM(major=10, minor=0), vram=Memory(quantity=178.3616943359375, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=191514411008), max_power_limit=1000.0, is_integrated=False, gpu_index=2), GPU(name='NVIDIA B200', pci_id='0x290110DE', compute_sm=ComputeSM(major=10, minor=0), vram=Memory(quantity=178.3616943359375, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=191514411008), max_power_limit=1000.0, is_integrated=False, gpu_index=3), GPU(name='NVIDIA B200', pci_id='0x290110DE', compute_sm=ComputeSM(major=10, minor=0), vram=Memory(quantity=178.3616943359375, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=191514411008), max_power_limit=1000.0, is_integrated=False, gpu_index=4), GPU(name='NVIDIA B200', pci_id='0x290110DE', compute_sm=ComputeSM(major=10, minor=0), vram=Memory(quantity=178.3616943359375, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=191514411008), max_power_limit=1000.0, is_integrated=False, gpu_index=5), GPU(name='NVIDIA B200', pci_id='0x290110DE', compute_sm=ComputeSM(major=10, minor=0), vram=Memory(quantity=178.3616943359375, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=191514411008), max_power_limit=1000.0, is_integrated=False, gpu_index=6), GPU(name='NVIDIA B200', pci_id='0x290110DE', compute_sm=ComputeSM(major=10, minor=0), vram=Memory(quantity=178.3616943359375, byte_suffix=<ByteSuffix.GiB: (1024, 3)>, _num_bytes=191514411008), max_power_limit=1000.0, is_integrated=False, gpu_index=7)], <class 'nvmitten.nvidia.accelerator.DLA'>: []}, extras={'id': 'B200-SXM-180GBx8', 'tags': {'multi_gpu', 'gpu_based', 'start_from_device_enabled'}, 'name': 'B200_SXM_180GBx8', 'primary_compute_sm': ComputeSM(major=10, minor=0)})
tensor_path : build/preprocessed_data/open-images-v6-mlperf/validation/Retinanet/int8_linear
test_mode : AccuracyOnly
use_graphs : False
system_id : B200-SXM-180GBx8
config_name : B200-SXM-180GBx8_retinanet_Offline
workload_setting : WorkloadSetting(HarnessType.LWIS, AccuracyTarget.k_99, PowerSetting.MaxP)
optimization_level : plugin-enabled
num_profiles : 1
config_ver : lwis_k_99_MaxP
accuracy_level : 99%
inference_server : lwis
skip_file_checks : False
power_limit : None
cpu_freq : None
gpu_engine_batch_size : 48
&&&& RUNNING Default_Harness # ./build/bin/harness_default
[I] mlperf.conf path: build/loadgen-configs/B200-SXM-180GBx8_TRT/retinanet/Offline/mlperf.conf
[I] user.conf path: build/loadgen-configs/B200-SXM-180GBx8_TRT/retinanet/Offline/user.conf
Creating QSL.
Finished Creating QSL.
Setting up SUT.
[I] [TRT] Loaded engine size: 39 MiB
[I] Device:0.GPU: [0] ./build/engines/B200-SXM-180GBx8/retinanet/Offline/retinanet-Offline-gpu-retinanet-b48-int8.lwis_k_99_MaxP.plan has been successfully loaded.
[I] [TRT] Loaded engine size: 39 MiB
[I] Device:1.GPU: [0] ./build/engines/B200-SXM-180GBx8/retinanet/Offline/retinanet-Offline-gpu-retinanet-b48-int8.lwis_k_99_MaxP.plan has been successfully loaded.
[I] [TRT] Loaded engine size: 39 MiB
[I] Device:2.GPU: [0] ./build/engines/B200-SXM-180GBx8/retinanet/Offline/retinanet-Offline-gpu-retinanet-b48-int8.lwis_k_99_MaxP.plan has been successfully loaded.
[I] [TRT] Loaded engine size: 39 MiB
[I] Device:3.GPU: [0] ./build/engines/B200-SXM-180GBx8/retinanet/Offline/retinanet-Offline-gpu-retinanet-b48-int8.lwis_k_99_MaxP.plan has been successfully loaded.
[I] [TRT] Loaded engine size: 39 MiB
[I] Device:4.GPU: [0] ./build/engines/B200-SXM-180GBx8/retinanet/Offline/retinanet-Offline-gpu-retinanet-b48-int8.lwis_k_99_MaxP.plan has been successfully loaded.
[I] [TRT] Loaded engine size: 39 MiB
[I] Device:5.GPU: [0] ./build/engines/B200-SXM-180GBx8/retinanet/Offline/retinanet-Offline-gpu-retinanet-b48-int8.lwis_k_99_MaxP.plan has been successfully loaded.
[I] [TRT] Loaded engine size: 39 MiB
[I] Device:6.GPU: [0] ./build/engines/B200-SXM-180GBx8/retinanet/Offline/retinanet-Offline-gpu-retinanet-b48-int8.lwis_k_99_MaxP.plan has been successfully loaded.
[I] [TRT] Loaded engine size: 39 MiB
[I] Device:7.GPU: [0] ./build/engines/B200-SXM-180GBx8/retinanet/Offline/retinanet-Offline-gpu-retinanet-b48-int8.lwis_k_99_MaxP.plan has been successfully loaded.
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26052, now: CPU 0, GPU 26354 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 52405 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 78456 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26052, now: CPU 0, GPU 104508 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 130559 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 156610 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26052, now: CPU 0, GPU 182662 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 208713 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 234764 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26052, now: CPU 0, GPU 260816 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 286867 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 312918 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26052, now: CPU 0, GPU 338970 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 365021 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 391072 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26052, now: CPU 0, GPU 417124 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 443175 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 469226 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26052, now: CPU 0, GPU 495278 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 521329 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 547380 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26052, now: CPU 0, GPU 573432 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 599483 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 625534 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26052, now: CPU 0, GPU 651586 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 677637 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 703688 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26052, now: CPU 0, GPU 729740 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 755791 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 781842 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26052, now: CPU 0, GPU 807894 (MiB)
[I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26051, now: CPU 0, GPU 833945 (MiB)
[I] Creating batcher thread: 0 EnableBatcherThreadPerDevice: false
Finished setting up SUT.
Starting warmup. Running for a minimum of 5 seconds.
[F] [TRT] Assertion failed: status == STATUS_SUCCESS
/work/inference_results_v5.0/closed/NVIDIA/code/plugin/NMSOptPlugin/src/nmsPluginOpt.cpp:221
Aborting...
Traceback (most recent call last):
File "", line 198, in _run_module_as_main
File "", line 88, in _run_code
File "/work/inference_results_v5.0/closed/NVIDIA/code/main.py", line 239, in
main(main_args, DETECTED_SYSTEM)
File "/work/inference_results_v5.0/closed/NVIDIA/code/main.py", line 144, in main
dispatch_action(main_args, config_dict, workload_setting)
File "/work/inference_results_v5.0/closed/NVIDIA/code/main.py", line 210, in dispatch_action
handler.run()
File "/work/inference_results_v5.0/closed/NVIDIA/code/actionhandler/base.py", line 81, in run
self.handle_failure()
File "/work/inference_results_v5.0/closed/NVIDIA/code/actionhandler/run_harness.py", line 199, in handle_failure
raise RuntimeError("Run harness failed!")
RuntimeError: Run harness failed!
Traceback (most recent call last):
File "/work/inference_results_v5.0/closed/NVIDIA/code/actionhandler/run_harness.py", line 164, in handle
result_data = self.harness.run_harness(flag_dict=self.harness_flag_dict, skip_generate_measurements=True, use_py_harness=self.use_py_harness)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/work/inference_results_v5.0/closed/NVIDIA/code/common/harness.py", line 356, in run_harness
output = run_command(self.construct_terminal_command(argstr),
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/work/inference_results_v5.0/closed/NVIDIA/code/common/init.py", line 67, in run_command
raise subprocess.CalledProcessError(ret, cmd)
subprocess.CalledProcessError: Command './build/bin/harness_default --plugins="build/plugins/NMSOptPlugin/libnmsoptplugin.so,build/plugins/retinanetConcatPlugin/libretinanetconcatplugin.so" --logfile_outdir="/work/inference_results_v5.0/closed/NVIDIA/build/logs/2025.05.13-12.52.04/B200-SXM-180GBx8_TRT/retinanet/Offline" --logfile_prefix="mlperf_log" --performance_sample_count=64 --test_mode="AccuracyOnly" --gpu_inference_streams=2 --run_infer_on_copy_streams=false --gpu_batch_size=48 --map_path="data_maps/open-images-v6-mlperf/val_map.txt" --tensor_path="build/preprocessed_data/open-images-v6-mlperf/validation/Retinanet/int8_linear" --use_graphs=false --gpu_engine_batch_size="48" --gpu_engines="./build/engines/B200-SXM-180GBx8/retinanet/Offline/retinanet-Offline-gpu-retinanet-b48-int8.lwis_k_99_MaxP.plan" --mlperf_conf_path="build/loadgen-configs/B200-SXM-180GBx8_TRT/retinanet/Offline/mlperf.conf" --user_conf_path="build/loadgen-configs/B200-SXM-180GBx8_TRT/retinanet/Offline/user.conf" --max_dlas=0 --scenario="Offline" --model="retinanet" --scenario Offline --model retinanet --response_postprocess openimageeffnms' died with <Signals.SIGABRT: 6>.
Reaping losing child 0x5555555f4470 PID 27741
make: *** [Makefile:53: run_harness] Error 1
Removing child 0x5555555f4470 PID 27741 from chain.