diff --git a/alphaquant/cluster/cluster_missingval.py b/alphaquant/cluster/cluster_missingval.py index f1d2763a..e1b7db50 100644 --- a/alphaquant/cluster/cluster_missingval.py +++ b/alphaquant/cluster/cluster_missingval.py @@ -8,6 +8,60 @@ PVALUE_THRESHOLD_FOR_INTENSITY_BASED_COUNTING = 0.1 +# Determines at which level missing value testing is performed. +# Set once based on tree structure, then reused. +MISSINGVAL_TEST_LEVEL = None + + +def determine_missingval_test_level(root_node): + """Determine the appropriate level for missing value statistical testing. + + Inspects the tree structure rooted at *root_node* and sets the module-level + global ``MISSINGVAL_TEST_LEVEL`` to one of: + + * ``"mod_seq_charge"`` -- fragment-level data where mod_seq_charge nodes exist. + * ``"base"`` -- all other hierarchies (precursor-only, peptide-only, gene-only). + + Scenarios: + 1) ``mod_seq_charge`` nodes exist in the tree -> test at ``mod_seq_charge`` level. + 2) Leaf parent type is ``mod_seq`` -> test at ``base`` ion level. + 3) Leaf parent type is ``seq`` -> test at ``base`` ion level. + 4) Leaf parent type is ``gene`` -> test at ``base`` ion level. + + Args: + root_node (anytree.Node): Root of a protein tree. Must have at least + one leaf with a parent. + + Raises: + ValueError: If the leaf parent type does not match any expected pattern. + + Side effects: + Sets the module-level global ``MISSINGVAL_TEST_LEVEL``. + """ + global MISSINGVAL_TEST_LEVEL + + # Check if mod_seq_charge nodes exist (fragment-level data) + mod_seq_charge_nodes = anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge") + if len(mod_seq_charge_nodes) > 0: + MISSINGVAL_TEST_LEVEL = "mod_seq_charge" + return + + # For all other cases, check what's one level above leaves + leaf_parent_type = root_node.leaves[0].parent.type + + if leaf_parent_type == "mod_seq": + # Scenario 2: charged peptides without fragments + MISSINGVAL_TEST_LEVEL = "base" + elif leaf_parent_type == "seq": + # Scenario 3: peptides without charge info + MISSINGVAL_TEST_LEVEL = "base" + elif leaf_parent_type == "gene": + # Scenario 4: simplest hierarchy, leaves directly under gene + MISSINGVAL_TEST_LEVEL = "base" + else: + raise ValueError(f"Unexpected tree structure: leaf parent type is '{leaf_parent_type}'. " + f"Expected one of: 'mod_seq', 'seq', 'gene', or tree with 'mod_seq_charge' nodes.") + def create_protnode_from_missingval_ions(gene_name,diffions, normed_c1, normed_c2): return MissingValProtNodeCreator(gene_name, diffions, normed_c1, normed_c2).prot_node @@ -76,11 +130,21 @@ def _assign_properties_to_missingval_base_ions(self, root_node): @staticmethod - def _get_nodes_to_test(root_node): #get the nodes in the lowest level that is relevant for the binomial test - if root_node.leaves[0].parent.type == "mod_seq": #when AlphaQuant works with precursors only (not fragments), the precursors themselves are the "base ions" and the "mod_seq_charge" node does not exist - return root_node.children - else: + def _get_nodes_to_test(root_node): + """Get the nodes at which to perform the missing value statistical test. + + Uses MISSINGVAL_TEST_LEVEL which is set once based on tree structure. + """ + global MISSINGVAL_TEST_LEVEL + + # Set the test level if not already determined + if MISSINGVAL_TEST_LEVEL is None: + determine_missingval_test_level(root_node) + + if MISSINGVAL_TEST_LEVEL == "mod_seq_charge": return anytree.search.findall(root_node, filter_=lambda node: node.type == "mod_seq_charge") + else: # "base" + return root_node.leaves def _propagate_properties_to_nodes_to_test(self,nodes_to_test): #goes through each node to test and merges the properties from it's base to the node itself @@ -134,7 +198,9 @@ def _aggregate_node_properties_missingval(self, node): node.c1_has_values = any(child.c1_has_values for child in childs) node.c2_has_values = any(child.c2_has_values for child in childs) if hasattr(childs[0], "z_val"): - node.z_val = aq_cluster_utils.sum_and_re_scale_zvalues([child.z_val for child in childs]) + node.z_val = aq_cluster_utils.sum_and_re_scale_zvalues( + [child.z_val for child in childs] + ) node.p_val = aq_cluster_utils.transform_znormed_to_pval(node.z_val) diff --git a/alphaquant/diffquant/background_distributions.py b/alphaquant/diffquant/background_distributions.py index b02a7ec1..b7146e9a 100644 --- a/alphaquant/diffquant/background_distributions.py +++ b/alphaquant/diffquant/background_distributions.py @@ -11,50 +11,160 @@ from numba import njit from statistics import NormalDist import alphaquant.diffquant.diffutils as aqdiffutils - +import alphaquant.config.variables as aqvariables class ConditionBackgrounds(): + """Orchestrates background distribution calculation. + + For single-pool mode, delegates to one ``_BackgroundCalculation``. + When ``split_by_ion_type=True`` and the config defines both fragment ions + and MS1 isotopes, runs a separate calculation per ion type and combines + the resulting per-ion dicts. + """ + + def __init__(self, normed_condition_df, p2z, ion2varscore=None, split_by_ion_type=False): + """Initialise condition backgrounds. - def __init__(self, normed_condition_df, p2z): - self.backgrounds = [] + Args: + normed_condition_df (pd.DataFrame): Normalised intensity matrix + (ions x samples) for one condition. + p2z (dict): Pre-computed p-value to z-value lookup. + ion2varscore (dict | None): Optional per-ion variance-predictor + scores used for sorting ions before background partitioning. + When None, ions are sorted by median intensity. + split_by_ion_type (bool): If True **and** the current config + defines both fragment ions and MS1 isotopes, build separate + background pools for each ion type. Defaults to False. + """ self.ion2background = {} self.ion2nonNanvals = {} self.ion2allvals = {} - self.idx2ion = {} - self.init_ion2nonNanvals(normed_condition_df) - self.context_ranges = [] - self.select_intensity_ranges(p2z) + + if split_by_ion_type and self._has_multiple_ion_types(): + self._build_split(normed_condition_df, p2z, ion2varscore) + else: + self._build_single(normed_condition_df, p2z, ion2varscore) self.all_intensities = np.concatenate(list(self.ion2nonNanvals.values())) self.num_replicates = len(next(iter(self.ion2allvals.values()))) + def _build_single(self, normed_condition_df, p2z, ion2varscore): + """Build backgrounds from a single pool of all ions.""" + calc = _BackgroundCalculation(normed_condition_df, p2z, ion2varscore) + self._update_backgrounds(calc) + + def _build_split(self, normed_condition_df, p2z, ion2varscore): + """Run a separate calculation for fragment ions and MS1 isotopes. + + Falls back to single-pool when either subset has fewer than 10 ions. + """ + ion_type_group = self._split_by_ion_type(normed_condition_df.index) + if ion_type_group["FRGION"].sum() < 10 or ion_type_group["MS1ISOTOPES"].sum() < 10: + self._build_single(normed_condition_df, p2z, ion2varscore) + return + + for marker, mask in ion_type_group.items(): + sub_df = normed_condition_df.loc[mask].copy() + LOGGER.info(f"Building background for ion type '{marker}' ({len(sub_df)} ions)") + calc = _BackgroundCalculation(sub_df, p2z, ion2varscore=ion2varscore) + self._update_backgrounds(calc) + + def _update_backgrounds(self, calc): + """Merge results from a ``_BackgroundCalculation`` into this instance.""" + self.ion2background.update(calc.ion2background) + self.ion2nonNanvals.update(calc.ion2nonNanvals) + self.ion2allvals.update(calc.ion2allvals) + + @staticmethod + def _has_multiple_ion_types(): + """Return True if the config defines both fragment ions and MS1 isotopes.""" + if aqvariables.CONFIG_DICT is None: + return False + ion_hierarchy = aqvariables.CONFIG_DICT.get("ion_hierarchy", {}) + return "fragion" in ion_hierarchy and "ms1iso" in ion_hierarchy + + @staticmethod + def _split_by_ion_type(index): + """Split an index into fragment-ion and MS1-isotope subsets.""" + index_str = np.array([str(x) for x in index]) + frgion_mask = np.array(["FRGION" in s for s in index_str]) + ms1_mask = np.array(["MS1ISOTOPES" in s for s in index_str]) + return {"FRGION": frgion_mask, "MS1ISOTOPES": ms1_mask} + + +class _BackgroundCalculation(): + """Computes background distributions for a single pool of ions. + + Sorts the ions (by variance-predictor score or median intensity), + partitions them into overlapping intensity ranges, and creates a + BackGroundDistribution for each range. + + After construction the following dicts are populated: + ``ion2background``, ``ion2nonNanvals``, ``ion2allvals``. + """ + + def __init__(self, normed_condition_df, p2z, ion2varscore=None): + self.ion2background = {} + self.ion2nonNanvals = {} + self.ion2allvals = {} + + self._sort_and_index(normed_condition_df, ion2varscore) + self._create_background_distributions(p2z) + def _sort_and_index(self, normed_condition_df, ion2varscore): + """Sort ions and build index-to-ion mappings. - def init_ion2nonNanvals(self, normed_condition_df): - normed_condition_df['median'] = normed_condition_df.median(numeric_only=True, axis=1) - normed_condition_df = normed_condition_df.sort_values(by='median').drop('median', axis=1) - self.normed_condition_df = normed_condition_df - #nonan_array = get_nonna_array(normed_condition_df.to_numpy()) - #self.ion2nonNanvals = dict(zip(normed_condition_df.index, nonan_array)) + When *ion2varscore* is provided, ions are sorted by their + variance-predictor score (rank-based). Otherwise, falls back to + sorting by row-wise median intensity. + + After sorting, populates ``ion2nonNanvals``, ``ion2allvals``, and + the private ``_idx2ion`` mapping. + + Args: + normed_condition_df (pd.DataFrame): Normalised intensity matrix + (ions x samples). + ion2varscore (dict | None): Mapping from ion id to a combined + variance-predictor score, or None for median-intensity sorting. + """ + if ion2varscore is not None: + sort_scores = normed_condition_df.index.map( + lambda x: ion2varscore.get(x, 0.5) + ) + normed_condition_df = normed_condition_df.assign( + _sort_score=sort_scores + ).sort_values(by='_sort_score').drop('_sort_score', axis=1) + else: + normed_condition_df = normed_condition_df.assign( + median=normed_condition_df.median(numeric_only=True, axis=1) + ).sort_values(by='median').drop('median', axis=1) + self._normed_condition_df = normed_condition_df self.ion2nonNanvals = aqutils.get_non_nas_from_pd_df(normed_condition_df) self.ion2allvals = aqutils.get_ionints_from_pd_df(normed_condition_df) - self.idx2ion = dict(zip(range(len(normed_condition_df.index)), normed_condition_df.index)) + self._idx2ion = dict(zip(range(len(normed_condition_df.index)), normed_condition_df.index)) + def _create_background_distributions(self, p2z): + """Partition sorted ions into overlapping intensity ranges and build backgrounds. - def select_intensity_ranges(self, p2z): + Creates ``BackGroundDistribution`` objects for overlapping windows + of ions and assigns every ion to one of these distributions via + ``self.ion2background``. + + Args: + p2z (dict): Pre-computed p-value to z-value lookup (passed through + to ``BackGroundDistribution``). + """ total_available_comparisons =0 - num_contexts = 10 - cumulative_counts = np.zeros(self.normed_condition_df.shape[0]) + num_contexts = aqvariables.NUM_BG_CONTEXTS + cumulative_counts = np.zeros(self._normed_condition_df.shape[0]) - for idx ,count in enumerate(self.normed_condition_df.count(axis=1)): + for idx ,count in enumerate(self._normed_condition_df.count(axis=1)): total_available_comparisons+=count-1 cumulative_counts[idx] = int(total_available_comparisons/2) - - #assign the context sizes context_size = np.max([1000, int(total_available_comparisons/(1+num_contexts/2))]) if context_size> total_available_comparisons: context_size = int(total_available_comparisons/2) @@ -64,15 +174,13 @@ def select_intensity_ranges(self, p2z): middle_idx = int(np.searchsorted(cumulative_counts, halfcontext_size)) end_idx = int(np.searchsorted(cumulative_counts, context_size)) - context_boundaries[0] = 0 context_boundaries[1] = middle_idx context_boundaries[2] = end_idx while context_boundaries[1] < len(cumulative_counts): - bgdist = BackGroundDistribution(context_boundaries[0], context_boundaries[2], self.ion2nonNanvals, self.idx2ion, p2z) - self.context_ranges.append([context_boundaries[0], context_boundaries[2]]) - self.assign_ions2bgdists(context_boundaries[0], context_boundaries[2], bgdist) - self.backgrounds.append(bgdist) + bgdist = BackGroundDistribution(context_boundaries[0], context_boundaries[2], self.ion2nonNanvals, self._idx2ion, p2z) + for idx in range(context_boundaries[0], context_boundaries[2]): + self.ion2background[self._idx2ion[idx]] = bgdist context_boundaries[0] = context_boundaries[1] context_boundaries[1] = context_boundaries[2] end_idx = np.searchsorted(cumulative_counts, context_size + cumulative_counts[context_boundaries[0]]) @@ -80,12 +188,6 @@ def select_intensity_ranges(self, p2z): end_idx = len(cumulative_counts) context_boundaries[2] = end_idx - def assign_ions2bgdists(self, boundaries1, boundaries2, bgdist): - ion2bg_local = {} #dict(map(lambda _idx : (self.normed_condition_df.index.values[_idx], bgdist), range(boundaries1, boundaries2))) - for idx in range(boundaries1, boundaries2): - ion2bg_local.update({self.idx2ion.get(idx) : bgdist}) - self.ion2background.update(ion2bg_local) - # Cell import numpy as np import random @@ -451,10 +553,6 @@ def get_doublediff_bg(deed_ion1, deed_ion2, deedpair2doublediffdist, p2z): return subtr_bg -def invert_deedkey(deedkey): - return (deedkey[1], deedkey[0]) - - # Cell from numba import njit @@ -509,12 +607,3 @@ def transform_cumulative_into_fc2count(cumulative, min_fc): fcs, counts = _transform_cumulative_vectorized(cumulative, min_fc) return dict(zip(fcs, counts)) -# Cell -@njit -def get_cumul_from_freq(freq): - res = np.zeros(len(freq), dtype=np.int64) - res[0] = freq[0] - for i in range(1,len(freq)): - res[i] = res[i-1] + freq[i] - - return res diff --git a/tests/unit_tests/test_background_distributions.py b/tests/unit_tests/test_background_distributions.py index 743fab95..c77fe48b 100644 --- a/tests/unit_tests/test_background_distributions.py +++ b/tests/unit_tests/test_background_distributions.py @@ -27,8 +27,16 @@ def fixed_input(sample2cond_df): def background_distributions(fixed_input): """Create background distributions for testing caching""" condbg = aq_diff_bg.ConditionBackgrounds(fixed_input, {}) - # Get a few different background distributions for testing - bg_list = list(condbg.backgrounds[:5]) # Get first 5 backgrounds + # Collect unique BackGroundDistribution objects from ion2background + seen = set() + bg_list = [] + for bg in condbg.ion2background.values(): + bg_id = id(bg) + if bg_id not in seen: + seen.add(bg_id) + bg_list.append(bg) + if len(bg_list) >= 5: + break return bg_list def test_condition_backgrounds(fixed_input): @@ -195,12 +203,10 @@ def test_cache_key_uniqueness_across_different_distributions(self, fixed_input): condbg1 = aq_diff_bg.ConditionBackgrounds(fixed_input, {}) condbg2 = aq_diff_bg.ConditionBackgrounds(fixed_input, {}) # Different instance with same data - # Get some backgrounds from each - bg1_from_condbg1 = condbg1.backgrounds[0] - bg1_from_condbg2 = condbg2.backgrounds[0] + # Get one background from each + bg1_from_condbg1 = next(iter(condbg1.ion2background.values())) + bg1_from_condbg2 = next(iter(condbg2.ion2background.values())) - # Even though they're created from the same data, they should have different keys - # (since they represent different object instances) key1 = bg1_from_condbg1.get_cache_key() key2 = bg1_from_condbg2.get_cache_key() @@ -231,3 +237,97 @@ def test_cache_key_efficiency_and_reliability(self, background_distributions): for bg_key in cache_key: assert isinstance(bg_key, tuple), "Each background key should be a tuple" assert len(bg_key) == 6, "Each background key should have 6 elements" + + +# --------------------------------------------------------------------------- +# Tests for _split_by_ion_type, _has_multiple_ion_types, and split background building +# (new on add_summarization_approach branch) +# --------------------------------------------------------------------------- +import alphaquant.config.variables as aqvariables + + +class TestSplitByIonType: + def test_correct_masks(self): + index = pd.Index([ + "PEP1_FRGION_y3", + "PEP1_FRGION_y4", + "PEP2_MS1ISOTOPES_0", + "PEP2_MS1ISOTOPES_1", + ]) + result = aq_diff_bg.ConditionBackgrounds._split_by_ion_type(index) + assert list(result["FRGION"]) == [True, True, False, False] + assert list(result["MS1ISOTOPES"]) == [False, False, True, True] + + def test_all_frgion(self): + index = pd.Index(["A_FRGION_y1", "B_FRGION_b2"]) + result = aq_diff_bg.ConditionBackgrounds._split_by_ion_type(index) + assert result["FRGION"].all() + assert not result["MS1ISOTOPES"].any() + + def test_empty_index(self): + index = pd.Index([]) + result = aq_diff_bg.ConditionBackgrounds._split_by_ion_type(index) + assert len(result["FRGION"]) == 0 + assert len(result["MS1ISOTOPES"]) == 0 + + +class TestHasMultipleIonTypes: + def setup_method(self): + self._original_config = aqvariables.CONFIG_DICT + + def teardown_method(self): + aqvariables.CONFIG_DICT = self._original_config + + def test_true_when_both_present(self): + aqvariables.CONFIG_DICT = { + "ion_hierarchy": {"fragion": {}, "ms1iso": {}} + } + assert aq_diff_bg.ConditionBackgrounds._has_multiple_ion_types() is True + + def test_false_when_only_fragion(self): + aqvariables.CONFIG_DICT = { + "ion_hierarchy": {"fragion": {}} + } + assert aq_diff_bg.ConditionBackgrounds._has_multiple_ion_types() is False + + def test_false_when_config_is_none(self): + aqvariables.CONFIG_DICT = None + assert aq_diff_bg.ConditionBackgrounds._has_multiple_ion_types() is False + + +class TestConditionBackgroundsSplit: + """Integration-level tests: verify split vs. single-pool mode.""" + + def _make_mixed_df(self, n_frg=50, n_ms1=50, n_samples=4): + rng = np.random.RandomState(42) + frg_names = [f"PEP{i}_FRGION_y{j}" for i in range(n_frg) for j in [1]] + ms1_names = [f"PEP{i}_MS1ISOTOPES_{j}" for i in range(n_ms1) for j in [0]] + all_names = frg_names + ms1_names + data = 10 + rng.randn(len(all_names), n_samples) + cols = [f"S{i}" for i in range(n_samples)] + return pd.DataFrame(data, index=all_names, columns=cols) + + def setup_method(self): + self._original_config = aqvariables.CONFIG_DICT + + def teardown_method(self): + aqvariables.CONFIG_DICT = self._original_config + + def test_split_mode_assigns_all_ions(self): + aqvariables.CONFIG_DICT = { + "ion_hierarchy": {"fragion": {}, "ms1iso": {}} + } + df = self._make_mixed_df() + cb = aq_diff_bg.ConditionBackgrounds(df, {}, split_by_ion_type=True) + assert set(cb.ion2background.keys()) == set(df.index) + + def test_single_pool_mode_assigns_all_ions(self): + df = self._make_mixed_df() + cb = aq_diff_bg.ConditionBackgrounds(df, {}, split_by_ion_type=False) + assert set(cb.ion2background.keys()) == set(df.index) + + def test_split_false_when_config_missing(self): + aqvariables.CONFIG_DICT = None + df = self._make_mixed_df() + cb = aq_diff_bg.ConditionBackgrounds(df, {}, split_by_ion_type=True) + assert set(cb.ion2background.keys()) == set(df.index) diff --git a/tests/unit_tests/test_cluster_missingval.py b/tests/unit_tests/test_cluster_missingval.py new file mode 100644 index 00000000..50622f4d --- /dev/null +++ b/tests/unit_tests/test_cluster_missingval.py @@ -0,0 +1,110 @@ +import anytree +import pytest + +import alphaquant.cluster.cluster_missingval as aq_missingval + + +def _reset_global(): + """Reset the module-level test-level global before each test.""" + aq_missingval.MISSINGVAL_TEST_LEVEL = None + + +# --------------------------------------------------------------------------- +# Helper: build minimal tree structures for each scenario +# --------------------------------------------------------------------------- + +def _tree_with_mod_seq_charge(): + """gene -> seq -> mod_seq -> mod_seq_charge -> frgion -> base""" + root = anytree.Node("gene1", type="gene") + seq = anytree.Node("seq1", parent=root, type="seq") + mod = anytree.Node("mod1", parent=seq, type="mod_seq") + msc = anytree.Node("msc1", parent=mod, type="mod_seq_charge") + frg = anytree.Node("frg1", parent=msc, type="frgion") + anytree.Node("base1", parent=frg, type="base") + anytree.Node("base2", parent=frg, type="base") + return root + + +def _tree_mod_seq_above_leaves(): + """gene -> seq -> mod_seq -> base (no mod_seq_charge)""" + root = anytree.Node("gene1", type="gene") + seq = anytree.Node("seq1", parent=root, type="seq") + mod = anytree.Node("mod1", parent=seq, type="mod_seq") + anytree.Node("base1", parent=mod, type="base") + anytree.Node("base2", parent=mod, type="base") + return root + + +def _tree_seq_above_leaves(): + """gene -> seq -> base (precursor-only, no mod_seq)""" + root = anytree.Node("gene1", type="gene") + seq = anytree.Node("seq1", parent=root, type="seq") + anytree.Node("base1", parent=seq, type="base") + anytree.Node("base2", parent=seq, type="base") + return root + + +def _tree_gene_above_leaves(): + """gene -> base (simplest hierarchy)""" + root = anytree.Node("gene1", type="gene") + anytree.Node("base1", parent=root, type="base") + anytree.Node("base2", parent=root, type="base") + return root + + +# --------------------------------------------------------------------------- +# Tests for determine_missingval_test_level +# --------------------------------------------------------------------------- + +class TestDetermineMissingvalTestLevel: + def setup_method(self): + _reset_global() + + def test_mod_seq_charge_tree(self): + root = _tree_with_mod_seq_charge() + aq_missingval.determine_missingval_test_level(root) + assert aq_missingval.MISSINGVAL_TEST_LEVEL == "mod_seq_charge" + + def test_mod_seq_above_leaves(self): + root = _tree_mod_seq_above_leaves() + aq_missingval.determine_missingval_test_level(root) + assert aq_missingval.MISSINGVAL_TEST_LEVEL == "base" + + def test_seq_above_leaves(self): + root = _tree_seq_above_leaves() + aq_missingval.determine_missingval_test_level(root) + assert aq_missingval.MISSINGVAL_TEST_LEVEL == "base" + + def test_gene_above_leaves(self): + root = _tree_gene_above_leaves() + aq_missingval.determine_missingval_test_level(root) + assert aq_missingval.MISSINGVAL_TEST_LEVEL == "base" + + def test_unexpected_structure_raises(self): + root = anytree.Node("root", type="unknown_top") + anytree.Node("leaf", parent=root, type="base") + with pytest.raises(ValueError, match="Unexpected tree structure"): + aq_missingval.determine_missingval_test_level(root) + + +class TestGetNodesToTest: + """Tests for MissingValProtNodeCreator._get_nodes_to_test.""" + + def setup_method(self): + _reset_global() + + def test_returns_mod_seq_charge_nodes_when_present(self): + root = _tree_with_mod_seq_charge() + nodes = aq_missingval.MissingValProtNodeCreator._get_nodes_to_test(root) + assert all(n.type == "mod_seq_charge" for n in nodes) + + def test_returns_leaves_for_mod_seq_tree(self): + root = _tree_mod_seq_above_leaves() + nodes = aq_missingval.MissingValProtNodeCreator._get_nodes_to_test(root) + assert all(n.type == "base" for n in nodes) + assert set(n.name for n in nodes) == {"base1", "base2"} + + def test_returns_leaves_for_gene_tree(self): + root = _tree_gene_above_leaves() + nodes = aq_missingval.MissingValProtNodeCreator._get_nodes_to_test(root) + assert all(n.type == "base" for n in nodes)