Added checking of computer resources in integtests #292

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

bieryAtFnal wants to merge 13 commits into develop from kbiery/check_computer_resources_in_integtests

integtest/3ru_1df_multirun_test.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -9,9 +9,15 @@
  
    import integrationtest.data_file_checks as data_file_checks

    import integrationtest.log_file_checks as log_file_checks

    import integrationtest.data_classes as data_classes

    import integrationtest.resource_validation as resource_validation

    from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir

    pytest_plugins = "integrationtest.integrationtest_drunc"

    # tweak the print() statement default behavior so that it always flushes the output.

    import functools

    print = functools.partial(print, flush=True)

    # Values that help determine the running conditions

    number_of_data_producers = 3

    number_of_readout_apps = 3

    @@ -25,8 +31,6 @@
  
    check_for_logfile_errors = True

    expected_event_count = trigger_rate * run_duration

    expected_event_count_tolerance = math.ceil(expected_event_count / 10)

    minimum_cpu_count = 18

    minimum_free_memory_gb = 24

    wibeth_frag_params = {

        "fragment_type_description": "WIBEth",

    @@ -90,18 +94,15 @@
  
        ],

    }

    # Determine if this computer is powerful enough for these tests

    sufficient_resources_on_this_computer = True

    cpu_count = os.cpu_count()

    hostname = os.uname().nodename

    mem_obj = psutil.virtual_memory()

    free_mem = round((mem_obj.available / (1024 * 1024 * 1024)), 2)

    total_mem = round((mem_obj.total / (1024 * 1024 * 1024)), 2)

    print(

        f"DEBUG: CPU count is {cpu_count}, free and total memory are {free_mem} GB and {total_mem} GB."

    )

    if cpu_count < minimum_cpu_count or free_mem < minimum_free_memory_gb:

        sufficient_resources_on_this_computer = False

    # Determine if this computer has enough resources for these tests

    resval = resource_validation.ResourceValidator()

    resval.require_cpu_count(60)  # 3 for each data source (incl TPG) plus 3 more for everything else; overall safety factor of 2

    resval.require_free_memory_gb(32)  # double what we observe being used ('free -h')

    resval.require_total_memory_gb(64)  # 4x what we need; trying to be kind to others

    actual_output_path = get_pytest_tmpdir()

    resval.require_free_disk_space_gb(actual_output_path, 1)  # more than what we observe

    resval_debug_string = resval.get_debug_string()

    print(f"{resval_debug_string}")

    # The next three variable declarations *must* be present as globals in the test

    # file. They're read by the "fixtures" in conftest.py to determine how

    @@ -158,9 +159,24 @@
  
        "WIBEth_System": conf_dict,

        "Software_TPG_System": swtpg_conf,

    }

    # When the computer doesn't have enough resources, we only need to run one configuration.

    # This is enough to provide feedback to the user about the lack of resources without spending

    # the time to run through all of the configurations that exist in this test.

    # It doesn't matter which configuration gets used because it doesn't really get executed,

    # so we just pick the first one.

    # The confgen_arguments key is a little important, though. We would like the pytest to still

    # provide useful feedback to the user even when the "-k" option is specified, so we combine

    # all of the existing keys into the new (dummy) one so that any valid "-k <config_name>"

    # selection will provide the desired feedback to the user about the insuffiicent resources.

    if not resval.this_computer_has_sufficient_resources:

        all_encompassing_dummy_key = ",".join(confgen_arguments.keys())

        first_config = next(iter(confgen_arguments.values()))

        confgen_arguments = {

            all_encompassing_dummy_key: first_config

        }

    # The commands to run in nanorc, as a list

    if sufficient_resources_on_this_computer:

    if resval.this_computer_has_sufficient_resources:

        nanorc_command_list = "boot conf".split()

        nanorc_command_list += (

            "start --run-number 101 wait 5 enable-triggers wait ".split()

    @@ -179,16 +195,17 @@
  
        )

        nanorc_command_list += "scrap terminate".split()

    else:

        nanorc_command_list = ["boot", "terminate"]

        nanorc_command_list = ["wait", "1"]

    # The tests themselves

    def test_nanorc_success(run_nanorc):

        if not sufficient_resources_on_this_computer:

            pytest.skip(

                f"This computer ({hostname}) does not have enough resources to run this test."

            )

    def test_nanorc_success(run_nanorc, capsys):

        if not resval.this_computer_has_sufficient_resources:

            resval_report_string = resval.get_insufficient_resources_report()

            with capsys.disabled():

                print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}")

            resval_summary_string = resval.get_insufficient_resources_summary()

            pytest.skip(f"{resval_summary_string}")

        # print the name of the current test

        current_test = os.environ.get("PYTEST_CURRENT_TEST")

    @@ -205,10 +222,9 @@ def test_nanorc_success(run_nanorc):
  
    def test_log_files(run_nanorc):

        if not sufficient_resources_on_this_computer:

            pytest.skip(

                f"This computer ({hostname}) does not have enough resources to run this test."

            )

        if not resval.this_computer_has_sufficient_resources:

            resval_summary_string = resval.get_insufficient_resources_summary()

            pytest.skip(f"\n{resval_summary_string}")

        if check_for_logfile_errors:

            # Check that there are no warnings or errors in the log files

    @@ -218,19 +234,9 @@ def test_log_files(run_nanorc):
  
    def test_data_files(run_nanorc):

        if not sufficient_resources_on_this_computer:

            print(

                f"This computer ({hostname}) does not have enough resources to run this test."

            )

            print(

                f"    (CPU count is {cpu_count}, free and total memory are {free_mem} GB and {total_mem} GB.)"

            )

            print(

                f"    (Minimum CPU count is {minimum_cpu_count} and minimum free memory is {minimum_free_memory_gb} GB.)"

            )

            pytest.skip(

                f"This computer ({hostname}) does not have enough resources to run this test."

            )

        if not resval.this_computer_has_sufficient_resources:

            resval_summary_string = resval.get_insufficient_resources_summary()

            pytest.skip(f"\n{resval_summary_string}")

        local_expected_event_count = expected_event_count

        local_event_count_tolerance = expected_event_count_tolerance

integtest/3ru_3df_multirun_test.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -7,9 +7,15 @@ @@
     import integrationtest.data_file_checks as data_file_checks
     import integrationtest.log_file_checks as log_file_checks
     import integrationtest.data_classes as data_classes
+    import integrationtest.resource_validation as resource_validation
+    from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir
     pytest_plugins = "integrationtest.integrationtest_drunc"
+    # tweak the print() statement default behavior so that it always flushes the output.
+    import functools
+    print = functools.partial(print, flush=True)
     # Values that help determine the running conditions
     number_of_data_producers = 2
     number_of_readout_apps = 3
@@ Expand Down Expand Up / @@ -111,6 +117,16 @@ @@
     # More information is provided about each of these below [coming soon!].
     #
+    # Determine if this computer has enough resources for these tests
+    resval = resource_validation.ResourceValidator()
+    resval.require_cpu_count(44)  # 3 for each data source (incl TPG) plus 3 more for everything else; overall safety factor of 2
+    resval.require_free_memory_gb(24)  # double what we observe being used ('free -h')
+    resval.require_total_memory_gb(48)  # 4x what we need; trying to be kind to others
+    actual_output_path = get_pytest_tmpdir()
+    resval.require_free_disk_space_gb(actual_output_path, 1)  # more than what we observe
+    resval_debug_string = resval.get_debug_string()
+    print(f"{resval_debug_string}")
     # 29-Dec-2025, KAB: The following comment about three variables is out-of-date.
     # It will be replaced soon, and the comment block above is a start on that.
     #
@@ Expand Down Expand Up / @@ -165,35 +181,61 @@ @@
         "Software_TPG_System": swtpg_conf,
     }
+    # When the computer doesn't have enough resources, we only need to run one configuration.
+    # This is enough to provide feedback to the user about the lack of resources without spending
+    # the time to run through all of the configurations that exist in this test.
+    # It doesn't matter which configuration gets used because it doesn't really get executed,
+    # so we just pick the first one.
+    # The confgen_arguments key is a little important, though. We would like the pytest to still
+    # provide useful feedback to the user even when the "-k" option is specified, so we combine
+    # all of the existing keys into the new (dummy) one so that any valid "-k <config_name>"
+    # selection will provide the desired feedback to the user about the insuffiicent resources.
+    if not resval.this_computer_has_sufficient_resources:
+        all_encompassing_dummy_key = ",".join(confgen_arguments.keys())
+        first_config = next(iter(confgen_arguments.values()))
+        confgen_arguments = {
+            all_encompassing_dummy_key: first_config
+        }
     # 29-Dec-2025, KAB: added sample process manager choices.
     process_manager_choices = {
         "StandAloneSSH_PM" : {"pm_type": "ssh-standalone"},
     #   "ParamikoClient_PM" : {"pm_type": "ssh-standalone-paramiko-client"},
     }
     # The commands to run in nanorc, as a list
-    nanorc_command_list = "boot conf".split()
-    nanorc_command_list += (
-        "start --run-number 101 wait 5 enable-triggers wait ".split()
-        + [str(run_duration)]
-        + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split()
-    )
-    nanorc_command_list += (
-        "start --run-number 102 wait 1 enable-triggers wait ".split()
-        + [str(run_duration)]
-        + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split()
-    )
-    nanorc_command_list += (
-        "start --run-number 103 wait 1 enable-triggers wait ".split()
-        + [str(run_duration)]
-        + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split()
-    )
-    nanorc_command_list += "scrap terminate".split()
+    if resval.this_computer_has_sufficient_resources:
+        nanorc_command_list = "boot conf".split()
+        nanorc_command_list += (
+            "start --run-number 101 wait 5 enable-triggers wait ".split()
+            + [str(run_duration)]
+            + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split()
+        )
+        nanorc_command_list += (
+            "start --run-number 102 wait 1 enable-triggers wait ".split()
+            + [str(run_duration)]
+            + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split()
+        )
+        nanorc_command_list += (
+            "start --run-number 103 wait 1 enable-triggers wait ".split()
+            + [str(run_duration)]
+            + "disable-triggers wait 1 drain-dataflow wait 2 stop-trigger-sources wait 1 stop wait 2".split()
+        )
+        nanorc_command_list += "scrap terminate".split()
+    else:
+        nanorc_command_list = ["wait", "1"]
     # The tests themselves
-    def test_nanorc_success(run_nanorc):
+    def test_nanorc_success(run_nanorc, capsys):
+        if not resval.this_computer_has_sufficient_resources:
+            resval_report_string = resval.get_insufficient_resources_report()
+            with capsys.disabled():
+                print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}")
+            resval_summary_string = resval.get_insufficient_resources_summary()
+            pytest.skip(f"{resval_summary_string}")
         # print the name of the current test
         current_test = os.environ.get("PYTEST_CURRENT_TEST")
         match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test)
@@ Expand All / @@ -209,6 +251,10 @@ def test_nanorc_success(run_nanorc): @@
     def test_log_files(run_nanorc):
+        if not resval.this_computer_has_sufficient_resources:
+            resval_summary_string = resval.get_insufficient_resources_summary()
+            pytest.skip(f"\n{resval_summary_string}")
         if check_for_logfile_errors:
             # Check that there are no warnings or errors in the log files
             assert log_file_checks.logs_are_error_free(
@@ Expand All / @@ -217,6 +263,10 @@ def test_log_files(run_nanorc): @@
     def test_data_files(run_nanorc):
+        if not resval.this_computer_has_sufficient_resources:
+            resval_summary_string = resval.get_insufficient_resources_summary()
+            pytest.skip(f"\n{resval_summary_string}")
         local_expected_event_count = expected_event_count
         local_event_count_tolerance = expected_event_count_tolerance
         low_number_of_files = expected_number_of_data_files
@@ Expand Down @@

integtest/example_system_test.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -9,9 +9,15 @@ @@
     import integrationtest.data_file_checks as data_file_checks
     import integrationtest.log_file_checks as log_file_checks
     import integrationtest.data_classes as data_classes
+    import integrationtest.resource_validation as resource_validation
+    from integrationtest.get_pytest_tmpdir import get_pytest_tmpdir
     pytest_plugins = "integrationtest.integrationtest_drunc"
+    # tweak the print() statement default behavior so that it always flushes the output.
+    import functools
+    print = functools.partial(print, flush=True)
     # Values that help determine the running conditions
     run_duration = 20  # seconds
@@ Expand Down Expand Up / @@ -71,6 +77,16 @@ @@
         ]
     }
+    # Determine if this computer has enough resources for these tests
+    resval = resource_validation.ResourceValidator()
+    resval.require_cpu_count(60)  # 3 for each data source (incl TPG) plus 6 more for everything else; overall safety factor of 2
+    resval.require_free_memory_gb(24)  # double what we observe being used ('free -h')
+    resval.require_total_memory_gb(48)  # 4x what we need; trying to be kind to others
+    actual_output_path = get_pytest_tmpdir()
+    resval.require_free_disk_space_gb(actual_output_path, 1)  # more than what we observe
+    resval_debug_string = resval.get_debug_string()
+    print(f"{resval_debug_string}")
     # The arguments to pass to the config generator, excluding the json
     # output directory (the test framework handles that)
@@ Expand Down Expand Up / @@ -122,18 +138,43 @@ def host_is_at_ehn1(hostname): @@
             "Local 2x3 Conf": twobythree_local_conf,
         }
+    # When the computer doesn't have enough resources, we only need to run one configuration.
+    # This is enough to provide feedback to the user about the lack of resources without spending
+    # the time to run through all of the configurations that exist in this test.
+    # It doesn't matter which configuration gets used because it doesn't really get executed,
+    # so we just pick the first one.
+    # The confgen_arguments key is a little important, though. We would like the pytest to still
+    # provide useful feedback to the user even when the "-k" option is specified, so we combine
+    # all of the existing keys into the new (dummy) one so that any valid "-k <config_name>"
+    # selection will provide the desired feedback to the user about the insuffiicent resources.
+    if not resval.this_computer_has_sufficient_resources:
+        all_encompassing_dummy_key = ",".join(confgen_arguments.keys())
+        first_config = next(iter(confgen_arguments.values()))
+        confgen_arguments = {
+            all_encompassing_dummy_key: first_config
+        }
     # The commands to run in nanorc, as a list
-    nanorc_command_list = (
-        "boot wait 2 conf start --run-number 101 wait 1 enable-triggers wait ".split()
-        + [str(run_duration)]
-        + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop scrap terminate".split()
-    )
+    if resval.this_computer_has_sufficient_resources:
+        nanorc_command_list = (
+            "boot wait 2 conf start --run-number 101 wait 1 enable-triggers wait ".split()
+            + [str(run_duration)]
+            + "disable-triggers wait 2 drain-dataflow wait 2 stop-trigger-sources stop scrap terminate".split()
+        )
+    else:
+        nanorc_command_list = ["wait", "1"]
     # The tests themselves
-    def test_nanorc_success(run_nanorc):
+    def test_nanorc_success(run_nanorc, capsys):
+        if not resval.this_computer_has_sufficient_resources:
+            resval_report_string = resval.get_insufficient_resources_report()
+            with capsys.disabled():
+                print(f"\n\N{LARGE YELLOW CIRCLE} {resval_report_string}")
+            resval_summary_string = resval.get_insufficient_resources_summary()
+            pytest.skip(f"{resval_summary_string}")
         # print the name of the current test
         current_test = os.environ.get("PYTEST_CURRENT_TEST")
         match_obj = re.search(r".*\[(.+)-run_.*rc.*\d].*", current_test)
@@ Expand All / @@ -154,15 +195,17 @@ def test_nanorc_success(run_nanorc): @@
     def test_log_files(run_nanorc):
-        current_test = os.environ.get("PYTEST_CURRENT_TEST")
+        if not resval.this_computer_has_sufficient_resources:
+            resval_summary_string = resval.get_insufficient_resources_summary()
+            pytest.skip(f"\n{resval_summary_string}")
+        current_test = os.environ.get("PYTEST_CURRENT_TEST")
         if not host_is_at_ehn1(hostname) and "EHN1" in current_test:
             pytest.skip(
                 f"This computer ({hostname}) is not at EHN1, not running EHN1 sessions"
             )
         session_name = run_nanorc.session_name if run_nanorc.session_name is not None else run_nanorc.session
         if host_is_at_ehn1(hostname) and "EHN1" in current_test:
             log_dir = pathlib.Path("/log")
             run_nanorc.log_files += list(log_dir.glob(f"log_*_{session_name}*.txt"))
@@ Expand Down Expand Up / @@ -190,8 +233,11 @@ def test_log_files(run_nanorc): @@
     def test_data_files(run_nanorc):
-        current_test = os.environ.get("PYTEST_CURRENT_TEST")
+        if not resval.this_computer_has_sufficient_resources:
+            resval_summary_string = resval.get_insufficient_resources_summary()
+            pytest.skip(f"\n{resval_summary_string}")
+        current_test = os.environ.get("PYTEST_CURRENT_TEST")
         if not host_is_at_ehn1(hostname) and "EHN1" in current_test:
             pytest.skip(
                 f"This computer ({hostname}) is not at EHN1, not running EHN1 sessions"
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Added checking of computer resources in integtests #292

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Added checking of computer resources in integtests #292

Are you sure you want to change the base?

Uh oh!

Added checking of computer resources in integtests #292

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!