diff --git a/integtest/3ru_1df_multirun_test.py b/integtest/3ru_1df_multirun_test.py index c4b5113..7c6725a 100644 --- a/integtest/3ru_1df_multirun_test.py +++ b/integtest/3ru_1df_multirun_test.py @@ -9,9 +9,15 @@ import integrationtest.data_file_checks as data_file_checks import integrationtest.log_file_checks as log_file_checks import integrationtest.data_classes as data_classes +import integrationtest.resource_validation as resource_validation +from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir pytest_plugins = "integrationtest.integrationtest_drunc" +# tweak the print() statement default behavior so that it always flushes the output. +import functools +print = functools.partial(print, flush=True) + # Values that help determine the running conditions number_of_data_producers = 3 number_of_readout_apps = 3 @@ -25,8 +31,6 @@ check_for_logfile_errors = True expected_event_count = trigger_rate * run_duration expected_event_count_tolerance = math.ceil(expected_event_count / 10) -minimum_cpu_count = 18 -minimum_free_memory_gb = 24 wibeth_frag_params = { "fragment_type_description": "WIBEth", @@ -90,18 +94,15 @@ ], } -# Determine if this computer is powerful enough for these tests -sufficient_resources_on_this_computer = True -cpu_count = os.cpu_count() -hostname = os.uname().nodename -mem_obj = psutil.virtual_memory() -free_mem = round((mem_obj.available / (1024 * 1024 * 1024)), 2) -total_mem = round((mem_obj.total / (1024 * 1024 * 1024)), 2) -print( - f"DEBUG: CPU count is {cpu_count}, free and total memory are {free_mem} GB and {total_mem} GB." -) -if cpu_count < minimum_cpu_count or free_mem < minimum_free_memory_gb: - sufficient_resources_on_this_computer = False +# Determine if this computer has enough resources for these tests +resval = resource_validation.ResourceValidator() +resval.require_cpu_count(60) # 3 for each data source (incl TPG) plus 3 more for everything else; overall safety factor of 2 +resval.require_free_memory_gb(32) # double what we observe being used ('free -h') +resval.require_total_memory_gb(64) # 4x what we need; trying to be kind to others +actual_output_path = get_pytest_tmpdir() +resval.require_free_disk_space_gb(actual_output_path, 1) # more than what we observe +resval_debug_string = resval.get_debug_string() +print(f"{resval_debug_string}") # The next three variable declarations *must* be present as globals in the test # file. They're read by the "fixtures" in conftest.py to determine how @@ -158,9 +159,24 @@ "WIBEth_System": conf_dict, "Software_TPG_System": swtpg_conf, } +# When the computer doesn't have enough resources, we only need to run one configuration. +# This is enough to provide feedback to the user about the lack of resources without spending +# the time to run through all of the configurations that exist in this test. +# It doesn't matter which configuration gets used because it doesn't really get executed, +# so we just pick the first one. +# The confgen_arguments key is a little important, though. We would like the pytest to still +# provide useful feedback to the user even when the "-k" option is specified, so we combine +# all of the existing keys into the new (dummy) one so that any valid "-k " +# selection will provide the desired feedback to the user about the insuffiicent resources. +if not resval.this_computer_has_sufficient_resources: + all_encompassing_dummy_key = ",".join(confgen_arguments.keys()) + first_config = next(iter(confgen_arguments.values())) + confgen_arguments = { + all_encompassing_dummy_key: first_config + } # The commands to run in nanorc, as a list -if sufficient_resources_on_this_computer: +if resval.this_computer_has_sufficient_resources: nanorc_command_list = "boot conf".split() nanorc_command_list += ( "start --run-number 101 wait 5 enable-triggers wait ".split() @@ -179,16 +195,17 @@ ) nanorc_command_list += "scrap terminate".split() else: - nanorc_command_list = ["boot", "terminate"] + nanorc_command_list = ["wait", "1"] # The tests themselves - -def test_nanorc_success(run_nanorc): - if not sufficient_resources_on_this_computer: - pytest.skip( - f"This computer ({hostname}) does not have enough resources to run this test." - ) +def test_nanorc_success(run_nanorc, capsys): + if not resval.this_computer_has_sufficient_resources: + resval_report_string = resval.get_insufficient_resources_report() + with capsys.disabled(): + print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}") + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"{resval_summary_string}") # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") @@ -205,10 +222,9 @@ def test_nanorc_success(run_nanorc): def test_log_files(run_nanorc): - if not sufficient_resources_on_this_computer: - pytest.skip( - f"This computer ({hostname}) does not have enough resources to run this test." - ) + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") if check_for_logfile_errors: # Check that there are no warnings or errors in the log files @@ -218,19 +234,9 @@ def test_log_files(run_nanorc): def test_data_files(run_nanorc): - if not sufficient_resources_on_this_computer: - print( - f"This computer ({hostname}) does not have enough resources to run this test." - ) - print( - f" (CPU count is {cpu_count}, free and total memory are {free_mem} GB and {total_mem} GB.)" - ) - print( - f" (Minimum CPU count is {minimum_cpu_count} and minimum free memory is {minimum_free_memory_gb} GB.)" - ) - pytest.skip( - f"This computer ({hostname}) does not have enough resources to run this test." - ) + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") local_expected_event_count = expected_event_count local_event_count_tolerance = expected_event_count_tolerance diff --git a/integtest/3ru_3df_multirun_test.py b/integtest/3ru_3df_multirun_test.py index a71e0cb..415a9b8 100644 --- a/integtest/3ru_3df_multirun_test.py +++ b/integtest/3ru_3df_multirun_test.py @@ -7,9 +7,15 @@ import integrationtest.data_file_checks as data_file_checks import integrationtest.log_file_checks as log_file_checks import integrationtest.data_classes as data_classes +import integrationtest.resource_validation as resource_validation +from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir pytest_plugins = "integrationtest.integrationtest_drunc" +# tweak the print() statement default behavior so that it always flushes the output. +import functools +print = functools.partial(print, flush=True) + # Values that help determine the running conditions number_of_data_producers = 2 number_of_readout_apps = 3 @@ -111,6 +117,16 @@ # More information is provided about each of these below [coming soon!]. # +# Determine if this computer has enough resources for these tests +resval = resource_validation.ResourceValidator() +resval.require_cpu_count(44) # 3 for each data source (incl TPG) plus 3 more for everything else; overall safety factor of 2 +resval.require_free_memory_gb(24) # double what we observe being used ('free -h') +resval.require_total_memory_gb(48) # 4x what we need; trying to be kind to others +actual_output_path = get_pytest_tmpdir() +resval.require_free_disk_space_gb(actual_output_path, 1) # more than what we observe +resval_debug_string = resval.get_debug_string() +print(f"{resval_debug_string}") + # 29-Dec-2025, KAB: The following comment about three variables is out-of-date. # It will be replaced soon, and the comment block above is a start on that. # @@ -165,6 +181,22 @@ "Software_TPG_System": swtpg_conf, } +# When the computer doesn't have enough resources, we only need to run one configuration. +# This is enough to provide feedback to the user about the lack of resources without spending +# the time to run through all of the configurations that exist in this test. +# It doesn't matter which configuration gets used because it doesn't really get executed, +# so we just pick the first one. +# The confgen_arguments key is a little important, though. We would like the pytest to still +# provide useful feedback to the user even when the "-k" option is specified, so we combine +# all of the existing keys into the new (dummy) one so that any valid "-k " +# selection will provide the desired feedback to the user about the insuffiicent resources. +if not resval.this_computer_has_sufficient_resources: + all_encompassing_dummy_key = ",".join(confgen_arguments.keys()) + first_config = next(iter(confgen_arguments.values())) + confgen_arguments = { + all_encompassing_dummy_key: first_config + } + # 29-Dec-2025, KAB: added sample process manager choices. process_manager_choices = { "StandAloneSSH_PM" : {"pm_type": "ssh-standalone"}, @@ -172,28 +204,38 @@ } # The commands to run in nanorc, as a list -nanorc_command_list = "boot conf".split() -nanorc_command_list += ( - "start --run-number 101 wait 5 enable-triggers wait ".split() - + [str(run_duration)] - + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split() -) -nanorc_command_list += ( - "start --run-number 102 wait 1 enable-triggers wait ".split() - + [str(run_duration)] - + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split() -) -nanorc_command_list += ( - "start --run-number 103 wait 1 enable-triggers wait ".split() - + [str(run_duration)] - + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split() -) -nanorc_command_list += "scrap terminate".split() +if resval.this_computer_has_sufficient_resources: + nanorc_command_list = "boot conf".split() + nanorc_command_list += ( + "start --run-number 101 wait 5 enable-triggers wait ".split() + + [str(run_duration)] + + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split() + ) + nanorc_command_list += ( + "start --run-number 102 wait 1 enable-triggers wait ".split() + + [str(run_duration)] + + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split() + ) + nanorc_command_list += ( + "start --run-number 103 wait 1 enable-triggers wait ".split() + + [str(run_duration)] + + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split() + ) + nanorc_command_list += "scrap terminate".split() +else: + nanorc_command_list = ["wait", "1"] # The tests themselves -def test_nanorc_success(run_nanorc): +def test_nanorc_success(run_nanorc, capsys): + if not resval.this_computer_has_sufficient_resources: + resval_report_string = resval.get_insufficient_resources_report() + with capsys.disabled(): + print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}") + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"{resval_summary_string}") + # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) @@ -209,6 +251,10 @@ def test_nanorc_success(run_nanorc): def test_log_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + if check_for_logfile_errors: # Check that there are no warnings or errors in the log files assert log_file_checks.logs_are_error_free( @@ -217,6 +263,10 @@ def test_log_files(run_nanorc): def test_data_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + local_expected_event_count = expected_event_count local_event_count_tolerance = expected_event_count_tolerance low_number_of_files = expected_number_of_data_files diff --git a/integtest/example_system_test.py b/integtest/example_system_test.py index bd93fc9..01e5ff0 100755 --- a/integtest/example_system_test.py +++ b/integtest/example_system_test.py @@ -9,9 +9,15 @@ import integrationtest.data_file_checks as data_file_checks import integrationtest.log_file_checks as log_file_checks import integrationtest.data_classes as data_classes +import integrationtest.resource_validation as resource_validation +from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir pytest_plugins = "integrationtest.integrationtest_drunc" +# tweak the print() statement default behavior so that it always flushes the output. +import functools +print = functools.partial(print, flush=True) + # Values that help determine the running conditions run_duration = 20 # seconds @@ -71,6 +77,16 @@ ] } +# Determine if this computer has enough resources for these tests +resval = resource_validation.ResourceValidator() +resval.require_cpu_count(60) # 3 for each data source (incl TPG) plus 6 more for everything else; overall safety factor of 2 +resval.require_free_memory_gb(24) # double what we observe being used ('free -h') +resval.require_total_memory_gb(48) # 4x what we need; trying to be kind to others +actual_output_path = get_pytest_tmpdir() +resval.require_free_disk_space_gb(actual_output_path, 1) # more than what we observe +resval_debug_string = resval.get_debug_string() +print(f"{resval_debug_string}") + # The arguments to pass to the config generator, excluding the json # output directory (the test framework handles that) @@ -122,18 +138,43 @@ def host_is_at_ehn1(hostname): "Local 2x3 Conf": twobythree_local_conf, } +# When the computer doesn't have enough resources, we only need to run one configuration. +# This is enough to provide feedback to the user about the lack of resources without spending +# the time to run through all of the configurations that exist in this test. +# It doesn't matter which configuration gets used because it doesn't really get executed, +# so we just pick the first one. +# The confgen_arguments key is a little important, though. We would like the pytest to still +# provide useful feedback to the user even when the "-k" option is specified, so we combine +# all of the existing keys into the new (dummy) one so that any valid "-k " +# selection will provide the desired feedback to the user about the insuffiicent resources. +if not resval.this_computer_has_sufficient_resources: + all_encompassing_dummy_key = ",".join(confgen_arguments.keys()) + first_config = next(iter(confgen_arguments.values())) + confgen_arguments = { + all_encompassing_dummy_key: first_config + } # The commands to run in nanorc, as a list -nanorc_command_list = ( - "boot wait 2 conf start --run-number 101 wait 1 enable-triggers wait ".split() - + [str(run_duration)] - + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop scrap terminate".split() -) +if resval.this_computer_has_sufficient_resources: + nanorc_command_list = ( + "boot wait 2 conf start --run-number 101 wait 1 enable-triggers wait ".split() + + [str(run_duration)] + + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop scrap terminate".split() + ) +else: + nanorc_command_list = ["wait", "1"] # The tests themselves -def test_nanorc_success(run_nanorc): +def test_nanorc_success(run_nanorc, capsys): + if not resval.this_computer_has_sufficient_resources: + resval_report_string = resval.get_insufficient_resources_report() + with capsys.disabled(): + print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}") + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"{resval_summary_string}") + # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) @@ -154,15 +195,17 @@ def test_nanorc_success(run_nanorc): def test_log_files(run_nanorc): - current_test = os.environ.get("PYTEST_CURRENT_TEST") + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + current_test = os.environ.get("PYTEST_CURRENT_TEST") if not host_is_at_ehn1(hostname) and "EHN1" in current_test: pytest.skip( f"This computer ({hostname}) is not at EHN1, not running EHN1 sessions" ) session_name = run_nanorc.session_name if run_nanorc.session_name is not None else run_nanorc.session - if host_is_at_ehn1(hostname) and "EHN1" in current_test: log_dir = pathlib.Path("/log") run_nanorc.log_files += list(log_dir.glob(f"log_*_{session_name}*.txt")) @@ -190,8 +233,11 @@ def test_log_files(run_nanorc): def test_data_files(run_nanorc): - current_test = os.environ.get("PYTEST_CURRENT_TEST") + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + current_test = os.environ.get("PYTEST_CURRENT_TEST") if not host_is_at_ehn1(hostname) and "EHN1" in current_test: pytest.skip( f"This computer ({hostname}) is not at EHN1, not running EHN1 sessions" diff --git a/integtest/fake_data_producer_test.py b/integtest/fake_data_producer_test.py index 6653dd3..c5dedd2 100644 --- a/integtest/fake_data_producer_test.py +++ b/integtest/fake_data_producer_test.py @@ -8,9 +8,15 @@ import integrationtest.data_file_checks as data_file_checks import integrationtest.log_file_checks as log_file_checks import integrationtest.data_classes as data_classes +import integrationtest.resource_validation as resource_validation +from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir pytest_plugins = "integrationtest.integrationtest_drunc" +# tweak the print() statement default behavior so that it always flushes the output. +import functools +print = functools.partial(print, flush=True) + # Values that help determine the running conditions run_duration = 20 # seconds # baseline_fragment_size_bytes=72+(464*81) # 81 frames of 464 bytes each with 72-byte Fragment header # ProtoWIB @@ -46,6 +52,16 @@ ], } +# Determine if this computer has enough resources for these tests +resval = resource_validation.ResourceValidator() +resval.require_cpu_count(8) # 4 for everything with safety factor of 2 +resval.require_free_memory_gb(4) # double what we observe being used ('free -h') +resval.require_total_memory_gb(8) # 4x what we need; trying to be kind to others +actual_output_path = get_pytest_tmpdir() +resval.require_free_disk_space_gb(actual_output_path, 1) # more than what we observe +resval_debug_string = resval.get_debug_string() +print(f"{resval_debug_string}") + # The next three variable declarations *must* be present as globals in the test # file. They're read by the "fixtures" in conftest.py to determine how # to run the config generation and nanorc @@ -84,30 +100,55 @@ "Baseline_Window_Size": conf_dict, "Double_Window_Size": doublewindow_conf, } +# When the computer doesn't have enough resources, we only need to run one configuration. +# This is enough to provide feedback to the user about the lack of resources without spending +# the time to run through all of the configurations that exist in this test. +# It doesn't matter which configuration gets used because it doesn't really get executed, +# so we just pick the first one. +# The confgen_arguments key is a little important, though. We would like the pytest to still +# provide useful feedback to the user even when the "-k" option is specified, so we combine +# all of the existing keys into the new (dummy) one so that any valid "-k " +# selection will provide the desired feedback to the user about the insuffiicent resources. +if not resval.this_computer_has_sufficient_resources: + all_encompassing_dummy_key = ",".join(confgen_arguments.keys()) + first_config = next(iter(confgen_arguments.values())) + confgen_arguments = { + all_encompassing_dummy_key: first_config + } + # The commands to run in nanorc, as a list -nanorc_command_list = "boot conf".split() -nanorc_command_list += ( +if resval.this_computer_has_sufficient_resources: + nanorc_command_list = "boot conf".split() + nanorc_command_list += ( "start --run-number 101 wait 5 enable-triggers wait ".split() + [str(run_duration)] + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split() ) -nanorc_command_list += ( + nanorc_command_list += ( "start --run-number 102 wait 1 enable-triggers wait ".split() + [str(run_duration)] + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split() ) -nanorc_command_list += ( + nanorc_command_list += ( "start --run-number 103 wait 1 enable-triggers wait ".split() + [str(run_duration)] + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split() ) -nanorc_command_list += "scrap terminate".split() + nanorc_command_list += "scrap terminate".split() +else: + nanorc_command_list = ["wait", "1"] # The tests themselves +def test_nanorc_success(run_nanorc, capsys): + if not resval.this_computer_has_sufficient_resources: + resval_report_string = resval.get_insufficient_resources_report() + with capsys.disabled(): + print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}") + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"{resval_summary_string}") -def test_nanorc_success(run_nanorc): # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) @@ -123,9 +164,11 @@ def test_nanorc_success(run_nanorc): def test_log_files(run_nanorc): - local_check_flag = check_for_logfile_errors + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") - if local_check_flag: + if check_for_logfile_errors: # Check that there are no warnings or errors in the log files assert log_file_checks.logs_are_error_free( run_nanorc.log_files, True, True, ignored_logfile_problems @@ -133,6 +176,10 @@ def test_log_files(run_nanorc): def test_data_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + local_expected_event_count = expected_event_count local_event_count_tolerance = expected_event_count_tolerance frag_params = wibeth_frag_params diff --git a/integtest/long_window_readout_test.py b/integtest/long_window_readout_test.py index 41eb63a..218401f 100644 --- a/integtest/long_window_readout_test.py +++ b/integtest/long_window_readout_test.py @@ -1,3 +1,17 @@ +# The goal of this test is to verify that triggers that have long readout windows are +# handled correctly by the system, including the splitting of the resulting "trigger record" +# into a "sequence" of TriggerRecords. +# +# This test requires a non-trivial amount of disk space to write its raw data files, +# and there are safety checks to verify that sufficient space is available for these files. +# In addition, the raw data files that are produced are removed at the end of the test +# so that they don't fill up the available space. +# *** If you are running on a computer that does not have sufficient space in /tmp, and you would +# like to instead use a directory on a disk that *does* have sufficient space, you can specify +# a non-standard pytest output directory using the "--tmpdir " to the +# dunedaq_integtest_bundle.sh script. (This test will clean up the large data files that are +# produced independent of which output directory is used.) +# import pytest import os import re @@ -8,6 +22,7 @@ import integrationtest.data_file_checks as data_file_checks import integrationtest.log_file_checks as log_file_checks import integrationtest.data_classes as data_classes +import integrationtest.resource_validation as resource_validation from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir pytest_plugins = "integrationtest.integrationtest_drunc" @@ -30,10 +45,6 @@ tr_queue_size = token_count * (readout_window_time_before + readout_window_time_after) / trigger_record_sequence_length / number_of_dataflow_apps latency_buffer_size = 600000 data_rate_slowdown_factor = 1 -minimum_cpu_count = 24 -minimum_free_memory_gb = 52 -minimum_total_disk_space_gb = 32 # double what we need -minimum_free_disk_space_gb = 24 # 50% more than what we need # Default values for validation parameters expected_number_of_data_files = 4 * number_of_dataflow_apps @@ -66,33 +77,16 @@ ], } -# Determine if the conditions are right for these tests -sufficient_disk_space = True -actual_output_path = output_path_parameter -if output_path_parameter == ".": - actual_output_path = get_pytest_tmpdir() -disk_space = shutil.disk_usage(actual_output_path) -total_disk_space_gb = disk_space.total / (1024 * 1024 * 1024) -free_disk_space_gb = disk_space.free / (1024 * 1024 * 1024) -print( - f"DEBUG: Space on disk for output path {actual_output_path}: total = {total_disk_space_gb} GB and free = {free_disk_space_gb} GB." -) -if ( - total_disk_space_gb < minimum_total_disk_space_gb - or free_disk_space_gb < minimum_free_disk_space_gb -): - sufficient_disk_space = False -sufficient_resources_on_this_computer = True -cpu_count = os.cpu_count() -hostname = os.uname().nodename -mem_obj = psutil.virtual_memory() -free_mem = round((mem_obj.available / (1024 * 1024 * 1024)), 2) -total_mem = round((mem_obj.total / (1024 * 1024 * 1024)), 2) -print( - f"DEBUG: CPU count is {cpu_count}, free and total memory are {free_mem} GB and {total_mem} GB." -) -if cpu_count < minimum_cpu_count or free_mem < minimum_free_memory_gb: - sufficient_resources_on_this_computer = False +# Determine if this computer has enough resources for these tests +resval = resource_validation.ResourceValidator() +resval.require_cpu_count(60) # 2 for each data source plus 6 more for everything else; overall safety factor of 2 +resval.require_free_memory_gb(87) # 50% more than what we observe being used ('free -h') +resval.require_total_memory_gb(116) # double what we need; trying to be kind to others +actual_output_path = get_pytest_tmpdir() +resval.require_free_disk_space_gb(actual_output_path, 25) # 25% more than what we need +resval.require_total_disk_space_gb(actual_output_path, 40) # double what we need +resval_debug_string = resval.get_debug_string() +print(f"{resval_debug_string}") # The next three variable declarations *must* be present as globals in the test # file. They're read by the "fixtures" in conftest.py to determine how @@ -178,7 +172,7 @@ } # The commands to run in nanorc, as a list -if sufficient_disk_space and sufficient_resources_on_this_computer: +if resval.this_computer_has_sufficient_resources: nanorc_command_list = "boot conf".split() nanorc_command_list += ( "start --trigger-rate ".split() @@ -196,20 +190,18 @@ ) nanorc_command_list += "scrap terminate".split() else: - nanorc_command_list = ["boot", "terminate"] + nanorc_command_list = ["wait", "1"] # The tests themselves -def test_nanorc_success(run_nanorc): - if not sufficient_resources_on_this_computer: - pytest.skip( - f"This computer ({hostname}) does not have enough resources to run this test." - ) - if not sufficient_disk_space: - pytest.skip( - f"The raw data output path ({actual_output_path}) does not have enough space to run this test." - ) +def test_nanorc_success(run_nanorc, capsys): + if not resval.this_computer_has_sufficient_resources: + resval_report_string = resval.get_insufficient_resources_report() + with capsys.disabled(): + print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}") + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"{resval_summary_string}") # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") @@ -226,14 +218,9 @@ def test_nanorc_success(run_nanorc): def test_log_files(run_nanorc): - if not sufficient_resources_on_this_computer: - pytest.skip( - f"This computer ({hostname}) does not have enough resources to run this test." - ) - if not sufficient_disk_space: - pytest.skip( - f"The raw data output path ({actual_output_path}) does not have enough space to run this test." - ) + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") if check_for_logfile_errors: # Check that there are no warnings or errors in the log files @@ -243,33 +230,9 @@ def test_log_files(run_nanorc): def test_data_files(run_nanorc): - if not sufficient_resources_on_this_computer: - print( - f"This computer ({hostname}) does not have enough resources to run this test." - ) - print( - f" (CPU count is {cpu_count}, free and total memory are {free_mem} GB and {total_mem} GB.)" - ) - print( - f" (Minimum CPU count is {minimum_cpu_count} and minimum free memory is {minimum_free_memory_gb} GB.)" - ) - pytest.skip( - f"This computer ({hostname}) does not have enough resources to run this test." - ) - - if not sufficient_disk_space: - print( - f"The raw data output path ({actual_output_path}) does not have enough space to run this test." - ) - print( - f" (Free and total space are {free_disk_space_gb} GB and {total_disk_space_gb} GB.)" - ) - print( - f" (Minimums are {minimum_free_disk_space_gb} GB and {minimum_total_disk_space_gb} GB.)" - ) - pytest.skip( - f"The raw data output path ({actual_output_path}) does not have enough space to run this test." - ) + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") local_expected_event_count = expected_event_count local_event_count_tolerance = expected_event_count_tolerance @@ -303,10 +266,9 @@ def test_data_files(run_nanorc): def test_cleanup(run_nanorc): - if not sufficient_disk_space: - pytest.skip( - f"The raw data output path ({actual_output_path}) does not have enough space to run this test." - ) + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") pathlist_string = "" filelist_string = "" diff --git a/integtest/minimal_system_quick_test.py b/integtest/minimal_system_quick_test.py index fff5d9b..923eecc 100644 --- a/integtest/minimal_system_quick_test.py +++ b/integtest/minimal_system_quick_test.py @@ -6,10 +6,16 @@ import integrationtest.data_file_checks as data_file_checks import integrationtest.log_file_checks as log_file_checks import integrationtest.data_classes as data_classes +import integrationtest.resource_validation as resource_validation import integrationtest.opmon_metric_checks as opmon_metric_checks +from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir pytest_plugins = "integrationtest.integrationtest_drunc" +# tweak the print() statement default behavior so that it always flushes the output. +import functools +print = functools.partial(print, flush=True) + # Values that help determine the running conditions number_of_data_producers = 2 data_rate_slowdown_factor = 1 # 10 for ProtoWIB/DuneWIB @@ -53,6 +59,16 @@ ], } +# Determine if this computer has enough resources for these tests +resval = resource_validation.ResourceValidator() +resval.require_cpu_count(12) # 2 for each data source plus 2 more for everything else; safety factor of 2 +resval.require_free_memory_gb(8) # double what we observe being used ('free -h') +resval.require_total_memory_gb(16) # 4x what we need; trying to be kind to others +actual_output_path = get_pytest_tmpdir() +resval.require_free_disk_space_gb(actual_output_path, 1) # more than what we observe +resval_debug_string = resval.get_debug_string() +print(f"{resval_debug_string}") + # The next three variable declarations *must* be present as globals in the test # file. They're read by the "fixtures" in conftest.py to determine how # to run the config generation and nanorc @@ -94,16 +110,26 @@ confgen_arguments = {"MinimalSystem": conf_dict} # The commands to run in nanorc, as a list -nanorc_command_list = ( - "boot conf start --run-number 101 wait 1 enable-triggers wait ".split() - + [str(run_duration)] - + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop scrap terminate".split() -) +if resval.this_computer_has_sufficient_resources: + nanorc_command_list = ( + "boot conf start --run-number 101 wait 1 enable-triggers wait ".split() + + [str(run_duration)] + + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop scrap terminate".split() + ) +else: + nanorc_command_list = ["wait", "1"] # The tests themselves -def test_nanorc_success(run_nanorc): +def test_nanorc_success(run_nanorc, capsys): + if not resval.this_computer_has_sufficient_resources: + resval_report_string = resval.get_insufficient_resources_report() + with capsys.disabled(): + print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}") + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"{resval_summary_string}") + # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) @@ -119,6 +145,9 @@ def test_nanorc_success(run_nanorc): def test_log_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") # Check that at least some of the expected log files are present assert any( @@ -143,6 +172,10 @@ def test_log_files(run_nanorc): def test_data_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + # Run some tests on the output data file all_ok = len(run_nanorc.data_files) == expected_number_of_data_files print("") # Clear potential dot from pytest @@ -177,6 +210,10 @@ def test_data_files(run_nanorc): # 26-Nov-2025, KAB: added some sample opmon metric checks, for demonstration purposes def test_metric_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + print("") # Clear potential dot from pytest # 10-Dec-2025, KAB: we have noticed that sometimes drunc transitions (or other parts of diff --git a/integtest/readout_type_scan_test.py b/integtest/readout_type_scan_test.py index a24e37c..17099fe 100644 --- a/integtest/readout_type_scan_test.py +++ b/integtest/readout_type_scan_test.py @@ -7,9 +7,15 @@ import integrationtest.data_file_checks as data_file_checks import integrationtest.log_file_checks as log_file_checks import integrationtest.data_classes as data_classes +import integrationtest.resource_validation as resource_validation +from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir pytest_plugins = "integrationtest.integrationtest_drunc" +# tweak the print() statement default behavior so that it always flushes the output. +import functools +print = functools.partial(print, flush=True) + # Values that help determine the running conditions number_of_data_producers = 2 run_duration = 20 # seconds @@ -134,6 +140,16 @@ ], } +# Determine if this computer has enough resources for these tests +resval = resource_validation.ResourceValidator() +resval.require_cpu_count(16) # 3 for each data source (incl TPG) plus 2 more for everything else; safety factor of 2 +resval.require_free_memory_gb(10) # double what we observe being used ('free -h') +resval.require_total_memory_gb(20) # 4x what we need; trying to be kind to others +actual_output_path = get_pytest_tmpdir() +resval.require_free_disk_space_gb(actual_output_path, 1) # more than what we observe +resval_debug_string = resval.get_debug_string() +print(f"{resval_debug_string}") + # The next three variable declarations *must* be present as globals in the test # file. They're read by the "fixtures" in conftest.py to determine how # to run the config generation and nanorc @@ -282,19 +298,44 @@ "BernCRT_System": bern_crt_conf, "GrenobleCRT_System": grenoble_crt_conf } +# When the computer doesn't have enough resources, we only need to run one configuration. +# This is enough to provide feedback to the user about the lack of resources without spending +# the time to run through all of the configurations that exist in this test. +# It doesn't matter which configuration gets used because it doesn't really get executed, +# so we just pick the first one. +# The confgen_arguments key is a little important, though. We would like the pytest to still +# provide useful feedback to the user even when the "-k" option is specified, so we combine +# all of the existing keys into the new (dummy) one so that any valid "-k " +# selection will provide the desired feedback to the user about the insuffiicent resources. +if not resval.this_computer_has_sufficient_resources: + all_encompassing_dummy_key = ",".join(confgen_arguments.keys()) + first_config = next(iter(confgen_arguments.values())) + confgen_arguments = { + all_encompassing_dummy_key: first_config + } # The commands to run in nanorc, as a list -nanorc_command_list = ( - "boot conf start --run-number 101 wait 2 enable-triggers wait ".split() - + [str(run_duration)] - + "disable-triggers wait 2 drain-dataflow stop-trigger-sources wait 2 stop scrap terminate".split() -) -# + "disable-triggers wait 5 drain-dataflow wait 2 stop-trigger-sources wait 2 stop scrap terminate".split() +if resval.this_computer_has_sufficient_resources: + nanorc_command_list = ( + "boot conf start --run-number 101 wait 2 enable-triggers wait ".split() + + [str(run_duration)] + + "disable-triggers wait 2 drain-dataflow stop-trigger-sources wait 2 stop scrap terminate".split() + ) + # + "disable-triggers wait 5 drain-dataflow wait 2 stop-trigger-sources wait 2 stop scrap terminate".split() +else: + nanorc_command_list = ["wait", "1"] + # The tests themselves +def test_nanorc_success(run_nanorc, capsys): + if not resval.this_computer_has_sufficient_resources: + resval_report_string = resval.get_insufficient_resources_report() + with capsys.disabled(): + print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}") + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"{resval_summary_string}") -def test_nanorc_success(run_nanorc): # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) @@ -310,9 +351,11 @@ def test_nanorc_success(run_nanorc): def test_log_files(run_nanorc): - local_check_flag = check_for_logfile_errors + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") - if local_check_flag: + if check_for_logfile_errors: # Check that there are no warnings or errors in the log files assert log_file_checks.logs_are_error_free( run_nanorc.log_files, True, True, ignored_logfile_problems @@ -320,6 +363,10 @@ def test_log_files(run_nanorc): def test_data_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + local_expected_event_count = expected_event_count local_event_count_tolerance = expected_event_count_tolerance fragment_check_list = [triggercandidate_frag_params] diff --git a/integtest/small_footprint_quick_test.py b/integtest/small_footprint_quick_test.py index c930dc9..9fb36ec 100644 --- a/integtest/small_footprint_quick_test.py +++ b/integtest/small_footprint_quick_test.py @@ -6,9 +6,15 @@ import integrationtest.data_file_checks as data_file_checks import integrationtest.log_file_checks as log_file_checks import integrationtest.data_classes as data_classes +import integrationtest.resource_validation as resource_validation +from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir pytest_plugins = "integrationtest.integrationtest_drunc" +# tweak the print() statement default behavior so that it always flushes the output. +import functools +print = functools.partial(print, flush=True) + # Values that help determine the running conditions number_of_data_producers = 1 data_rate_slowdown_factor = 1 # 10 for ProtoWIB/DuneWIB @@ -53,6 +59,16 @@ ], } +# Determine if this computer has enough resources for these tests +resval = resource_validation.ResourceValidator() +resval.require_cpu_count(8) # 2 for each data source plus 2 more for everything else; overall safety factor of 2 +resval.require_free_memory_gb(6) # double what we observe being used ('free -h') +resval.require_total_memory_gb(12) # 4x what we need; trying to be kind to others +actual_output_path = get_pytest_tmpdir() +resval.require_free_disk_space_gb(actual_output_path, 1) # more than what we observe +resval_debug_string = resval.get_debug_string() +print(f"{resval_debug_string}") + # The next three variable declarations *must* be present as globals in the test # file. They're read by the "fixtures" in conftest.py to determine how # to run the config generation and nanorc @@ -79,26 +95,35 @@ conf_dict.config_substitutions.append( data_classes.attribute_substitution(obj_class="LatencyBuffer", updates={"size": 50000}) ) - conf_dict.config_substitutions.append( data_classes.attribute_substitution( obj_class="FakeHSIEventGeneratorConf", updates={"trigger_rate": 1.0}, ) ) - confgen_arguments = {"SmallFootprint": conf_dict} + # The commands to run in nanorc, as a list -nanorc_command_list = ( - "boot conf start --run-number 101 wait 1 enable-triggers wait ".split() - + [str(run_duration)] - + "disable-triggers wait 2 drain-dataflow stop-trigger-sources stop wait 2 scrap terminate".split() -) +if resval.this_computer_has_sufficient_resources: + nanorc_command_list = ( + "boot conf start --run-number 101 wait 1 enable-triggers wait ".split() + + [str(run_duration)] + + "disable-triggers wait 2 drain-dataflow stop-trigger-sources stop wait 2 scrap terminate".split() + ) +else: + nanorc_command_list = ["wait", "1"] # The tests themselves -def test_nanorc_success(run_nanorc): +def test_nanorc_success(run_nanorc, capsys): + if not resval.this_computer_has_sufficient_resources: + resval_report_string = resval.get_insufficient_resources_report() + with capsys.disabled(): + print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}") + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"{resval_summary_string}") + # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) @@ -114,6 +139,10 @@ def test_nanorc_success(run_nanorc): def test_log_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + if check_for_logfile_errors: # Check that there are no warnings or errors in the log files assert log_file_checks.logs_are_error_free( @@ -122,6 +151,10 @@ def test_log_files(run_nanorc): def test_data_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + # Run some tests on the output data file assert len(run_nanorc.data_files) == expected_number_of_data_files diff --git a/integtest/tpg_state_collection_test.py b/integtest/tpg_state_collection_test.py index 5e94b14..b4000de 100644 --- a/integtest/tpg_state_collection_test.py +++ b/integtest/tpg_state_collection_test.py @@ -7,9 +7,15 @@ import integrationtest.log_file_checks as log_file_checks import integrationtest.data_classes as data_classes import integrationtest.opmon_metric_checks as opmon_metric_checks +import integrationtest.resource_validation as resource_validation +from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir pytest_plugins = "integrationtest.integrationtest_drunc" +# tweak the print() statement default behavior so that it always flushes the output. +import functools +print = functools.partial(print, flush=True) + # Values that help determine the running conditions number_of_data_producers = 2 number_of_readout_apps = 1 @@ -102,6 +108,16 @@ ], } +# Determine if this computer has enough resources for these tests +resval = resource_validation.ResourceValidator() +resval.require_cpu_count(16) # 3 for each data source (incl TPG) plus 2 more for everything else; overall safety factor of 2 +resval.require_free_memory_gb(10) # double what we observe being used ('free -h') +resval.require_total_memory_gb(20) # 4x what we need; trying to be kind to others +actual_output_path = get_pytest_tmpdir() +resval.require_free_disk_space_gb(actual_output_path, 1) # more than what we observe +resval_debug_string = resval.get_debug_string() +print(f"{resval_debug_string}") + object_databases = ["config/daqsystemtest/integrationtest-objects.data.xml"] conf_dict = data_classes.drunc_config() @@ -185,21 +201,31 @@ confgen_arguments = {"Software_TPG_System": conf_dict} # The commands to run in nanorc, as a list -nanorc_command_list = ( - "boot conf wait 5".split() - + "start --run-number 101 wait 1 enable-triggers wait ".split() - + [str(run_duration)] - + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop ".split() - + "start --run-number 102 wait 1 enable-triggers wait ".split() - + [str(run_duration)] - + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop ".split() - + " scrap terminate".split() -) +if resval.this_computer_has_sufficient_resources: + nanorc_command_list = ( + "boot conf wait 5".split() + + "start --run-number 101 wait 1 enable-triggers wait ".split() + + [str(run_duration)] + + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop ".split() + + "start --run-number 102 wait 1 enable-triggers wait ".split() + + [str(run_duration)] + + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop ".split() + + " scrap terminate".split() + ) +else: + nanorc_command_list = ["wait", "1"] # The tests themselves -def test_nanorc_success(run_nanorc): +def test_nanorc_success(run_nanorc, capsys): + if not resval.this_computer_has_sufficient_resources: + resval_report_string = resval.get_insufficient_resources_report() + with capsys.disabled(): + print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}") + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"{resval_summary_string}") + # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) @@ -215,6 +241,10 @@ def test_nanorc_success(run_nanorc): def test_log_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + if check_for_logfile_errors: # Check that there are no warnings or errors in the log files assert log_file_checks.logs_are_error_free( @@ -223,6 +253,10 @@ def test_log_files(run_nanorc): def test_data_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + local_expected_event_count = expected_event_count local_event_count_tolerance = expected_event_count_tolerance low_number_of_files = expected_number_of_data_files @@ -271,6 +305,10 @@ def test_data_files(run_nanorc): def test_tpstream_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + tpstream_files = run_nanorc.tpset_files local_expected_event_count = ( run_duration + 8 @@ -301,6 +339,10 @@ def test_tpstream_files(run_nanorc): # 26-Nov-2025, KAB: added checking of opmon metrics to verify that the ones that are # specifically enabled in this test work as expected. def test_metric_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + print("") # Clear potential dot from pytest session_name = run_nanorc.session_name if run_nanorc.session_name else run_nanorc.session diff --git a/integtest/tpreplay_test.py b/integtest/tpreplay_test.py index 0a7331e..8932566 100644 --- a/integtest/tpreplay_test.py +++ b/integtest/tpreplay_test.py @@ -36,6 +36,8 @@ import integrationtest.data_classes as data_classes import integrationtest.data_file_checks as data_file_checks import integrationtest.log_file_checks as log_file_checks +import integrationtest.resource_validation as resource_validation +from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir from daqconf.consolidate import copy_configuration from pathlib import Path @@ -47,6 +49,10 @@ def _cleanup_tmpdir(): pytest_plugins = "integrationtest.integrationtest_drunc" +# tweak the print() statement default behavior so that it always flushes the output. +import functools +print = functools.partial(print, flush=True) + # Run setup run_duration = 20 # seconds check_for_logfile_errors = True @@ -69,6 +75,16 @@ def _cleanup_tmpdir(): ] } +# Determine if this computer has enough resources for these tests +resval = resource_validation.ResourceValidator() +resval.require_cpu_count(12) # 3 for ConnSvc threads plus 3 more for everything else; overall safety factor of 2 +resval.require_free_memory_gb(4) # double what we observe being used ('free -h') +resval.require_total_memory_gb(8) # 4x what we need; trying to be kind to others +actual_output_path = get_pytest_tmpdir() +resval.require_free_disk_space_gb(actual_output_path, 1) # more than what we observe +resval_debug_string = resval.get_debug_string() +print(f"{resval_debug_string}") + ### Config setup # Create temp config tmpdirname = tempfile.mkdtemp() @@ -225,22 +241,47 @@ def _cleanup_tmpdir(): "np02-tpreplay": tpreplay_local_conf, "np04-tpreplay": tpreplay_np04_conf } +# When the computer doesn't have enough resources, we only need to run one configuration. +# This is enough to provide feedback to the user about the lack of resources without spending +# the time to run through all of the configurations that exist in this test. +# It doesn't matter which configuration gets used because it doesn't really get executed, +# so we just pick the first one. +# The confgen_arguments key is a little important, though. We would like the pytest to still +# provide useful feedback to the user even when the "-k" option is specified, so we combine +# all of the existing keys into the new (dummy) one so that any valid "-k " +# selection will provide the desired feedback to the user about the insuffiicent resources. +if not resval.this_computer_has_sufficient_resources: + all_encompassing_dummy_key = ",".join(confgen_arguments.keys()) + first_config = next(iter(confgen_arguments.values())) + confgen_arguments = { + all_encompassing_dummy_key: first_config + } # The commands to run in nanorc, as a list -nanorc_command_list = "boot conf wait 5".split() -nanorc_command_list += ( +if resval.this_computer_has_sufficient_resources: + nanorc_command_list = "boot conf wait 5".split() + nanorc_command_list += ( "start ".split() + "--run-number 101 enable-triggers wait ".split() + [str(run_duration)] + "disable-triggers drain-dataflow wait 2 stop-trigger-sources wait 2 stop wait 2".split() ) -nanorc_command_list += "scrap terminate".split() + nanorc_command_list += "scrap terminate".split() +else: + nanorc_command_list = ["wait", "1"] atexit.register(_cleanup_tmpdir) ### Tests # Run control -def test_nanorc_success(run_nanorc): +def test_nanorc_success(run_nanorc, capsys): + if not resval.this_computer_has_sufficient_resources: + resval_report_string = resval.get_insufficient_resources_report() + with capsys.disabled(): + print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}") + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"{resval_summary_string}") + # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) @@ -256,6 +297,10 @@ def test_nanorc_success(run_nanorc): # Log files def test_log_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + session_name = run_nanorc.session_name if run_nanorc.session_name is not None else run_nanorc.session log_dir = pathlib.Path("/log") @@ -286,6 +331,10 @@ def test_log_files(run_nanorc): # Data files def test_data_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + current_test = os.environ.get("PYTEST_CURRENT_TEST") datafile_params = { diff --git a/integtest/tpstream_writing_test.py b/integtest/tpstream_writing_test.py index c5b3fd0..97605f8 100644 --- a/integtest/tpstream_writing_test.py +++ b/integtest/tpstream_writing_test.py @@ -6,9 +6,15 @@ import integrationtest.data_file_checks as data_file_checks import integrationtest.log_file_checks as log_file_checks import integrationtest.data_classes as data_classes +import integrationtest.resource_validation as resource_validation +from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir pytest_plugins = "integrationtest.integrationtest_drunc" +# tweak the print() statement default behavior so that it always flushes the output. +import functools +print = functools.partial(print, flush=True) + # Values that help determine the running conditions number_of_data_producers = 2 number_of_readout_apps = 1 @@ -101,6 +107,16 @@ ], } +# Determine if this computer has enough resources for these tests +resval = resource_validation.ResourceValidator() +resval.require_cpu_count(16) # 3 for each data source (incl TPG) plus 2 more for everything else; overall safety factor of 2 +resval.require_free_memory_gb(10) # double what we observe being used ('free -h') +resval.require_total_memory_gb(20) # 4x what we need; trying to be kind to others +actual_output_path = get_pytest_tmpdir() +resval.require_free_disk_space_gb(actual_output_path, 1) # more than what we observe +resval_debug_string = resval.get_debug_string() +print(f"{resval_debug_string}") + object_databases = ["config/daqsystemtest/integrationtest-objects.data.xml"] conf_dict = data_classes.drunc_config() @@ -164,21 +180,31 @@ confgen_arguments = {"Software_TPG_System": conf_dict} # The commands to run in nanorc, as a list -nanorc_command_list = ( - "boot conf wait 5".split() - + "start --run-number 101 wait 1 enable-triggers wait ".split() - + [str(run_duration)] - + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop ".split() - + "start --run-number 102 wait 1 enable-triggers wait ".split() - + [str(run_duration)] - + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop ".split() - + " scrap terminate".split() -) +if resval.this_computer_has_sufficient_resources: + nanorc_command_list = ( + "boot conf wait 5".split() + + "start --run-number 101 wait 1 enable-triggers wait ".split() + + [str(run_duration)] + + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop ".split() + + "start --run-number 102 wait 1 enable-triggers wait ".split() + + [str(run_duration)] + + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop ".split() + + " scrap terminate".split() + ) +else: + nanorc_command_list = ["wait", "1"] # The tests themselves -def test_nanorc_success(run_nanorc): +def test_nanorc_success(run_nanorc, capsys): + if not resval.this_computer_has_sufficient_resources: + resval_report_string = resval.get_insufficient_resources_report() + with capsys.disabled(): + print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}") + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"{resval_summary_string}") + # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) @@ -194,6 +220,10 @@ def test_nanorc_success(run_nanorc): def test_log_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + if check_for_logfile_errors: # Check that there are no warnings or errors in the log files assert log_file_checks.logs_are_error_free( @@ -202,6 +232,10 @@ def test_log_files(run_nanorc): def test_data_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + local_expected_event_count = expected_event_count local_event_count_tolerance = expected_event_count_tolerance low_number_of_files = expected_number_of_data_files @@ -250,6 +284,10 @@ def test_data_files(run_nanorc): def test_tpstream_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + tpstream_files = run_nanorc.tpset_files local_expected_event_count = ( run_duration + 8 diff --git a/integtest/trigger_bitwords_test.py b/integtest/trigger_bitwords_test.py index 5348360..08e0984 100644 --- a/integtest/trigger_bitwords_test.py +++ b/integtest/trigger_bitwords_test.py @@ -35,9 +35,15 @@ import integrationtest.data_classes as data_classes import integrationtest.data_file_checks as data_file_checks import integrationtest.log_file_checks as log_file_checks +import integrationtest.resource_validation as resource_validation +from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir pytest_plugins = "integrationtest.integrationtest_drunc" +# tweak the print() statement default behavior so that it always flushes the output. +import functools +print = functools.partial(print, flush=True) + # Run setup run_duration = 15 # seconds check_for_logfile_errors = True @@ -55,6 +61,16 @@ ] } +# Determine if this computer has enough resources for these tests +resval = resource_validation.ResourceValidator() +resval.require_cpu_count(30) # 3 for each data source (incl TPG) plus 3 more for everything else; overall safety factor of 2 +resval.require_free_memory_gb(14) # double what we observe being used ('free -h') +resval.require_total_memory_gb(28) # 4x what we need; trying to be kind to others +actual_output_path = get_pytest_tmpdir() +resval.require_free_disk_space_gb(actual_output_path, 1) # more than what we observe +resval_debug_string = resval.get_debug_string() +print(f"{resval_debug_string}") + ### Config setup common_config_obj = data_classes.drunc_config() common_config_obj.op_env = "test" @@ -227,20 +243,45 @@ "series-bit": series_bitword_conf, "coincidence-bit": coincidence_bitword_conf } +# When the computer doesn't have enough resources, we only need to run one configuration. +# This is enough to provide feedback to the user about the lack of resources without spending +# the time to run through all of the configurations that exist in this test. +# It doesn't matter which configuration gets used because it doesn't really get executed, +# so we just pick the first one. +# The confgen_arguments key is a little important, though. We would like the pytest to still +# provide useful feedback to the user even when the "-k" option is specified, so we combine +# all of the existing keys into the new (dummy) one so that any valid "-k " +# selection will provide the desired feedback to the user about the insuffiicent resources. +if not resval.this_computer_has_sufficient_resources: + all_encompassing_dummy_key = ",".join(confgen_arguments.keys()) + first_config = next(iter(confgen_arguments.values())) + confgen_arguments = { + all_encompassing_dummy_key: first_config + } # The commands to run in nanorc, as a list -nanorc_command_list = "boot conf".split() -nanorc_command_list += ( +if resval.this_computer_has_sufficient_resources: + nanorc_command_list = "boot conf".split() + nanorc_command_list += ( "start ".split() + "--run-number 101 enable-triggers wait ".split() + [str(run_duration)] + "disable-triggers drain-dataflow wait 2 stop-trigger-sources wait 2 stop wait 2".split() ) -nanorc_command_list += "scrap terminate".split() + nanorc_command_list += "scrap terminate".split() +else: + nanorc_command_list = ["wait", "1"] ### Tests # Run control -def test_nanorc_success(run_nanorc): +def test_nanorc_success(run_nanorc, capsys): + if not resval.this_computer_has_sufficient_resources: + resval_report_string = resval.get_insufficient_resources_report() + with capsys.disabled(): + print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}") + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"{resval_summary_string}") + # print the name of the current test current_test = os.environ.get("PYTEST_CURRENT_TEST") match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test) @@ -256,7 +297,9 @@ def test_nanorc_success(run_nanorc): # Log files def test_log_files(run_nanorc): - current_test = os.environ.get("PYTEST_CURRENT_TEST") + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") session_name = run_nanorc.session_name if run_nanorc.session_name is not None else run_nanorc.session @@ -288,6 +331,10 @@ def test_log_files(run_nanorc): # Data files def test_data_files(run_nanorc): + if not resval.this_computer_has_sufficient_resources: + resval_summary_string = resval.get_insufficient_resources_summary() + pytest.skip(f"\n{resval_summary_string}") + current_test = os.environ.get("PYTEST_CURRENT_TEST") # sanity checks