|
| 1 | +""" |
| 2 | +Test for the new fragment annotation arrays in PSM schema. |
| 3 | +""" |
| 4 | + |
| 5 | +import pytest |
| 6 | +from pathlib import Path |
| 7 | + |
| 8 | +try: |
| 9 | + import pyarrow as pa |
| 10 | + from quantmsio.core.format import PSM_FIELDS, PSM_UNIQUE_FIELDS |
| 11 | + from quantmsio.core.common import PSM_SCHEMA |
| 12 | + |
| 13 | + PYARROW_AVAILABLE = True |
| 14 | +except ImportError: |
| 15 | + PYARROW_AVAILABLE = False |
| 16 | + |
| 17 | + |
| 18 | +@pytest.mark.skipif(not PYARROW_AVAILABLE, reason="PyArrow not available") |
| 19 | +def test_psm_schema_contains_fragment_annotation_fields(): |
| 20 | + """Test that PSM schema contains the new fragment annotation fields.""" |
| 21 | + |
| 22 | + # Check that the new fields are present |
| 23 | + field_names = [field.name for field in PSM_UNIQUE_FIELDS] |
| 24 | + required_new_fields = ["charge_array", "ion_type_array", "ion_mobility_array"] |
| 25 | + |
| 26 | + for field_name in required_new_fields: |
| 27 | + assert field_name in field_names, f"{field_name} not found in PSM_UNIQUE_FIELDS" |
| 28 | + |
| 29 | + # Check that the PSM schema can be created successfully |
| 30 | + schema = PSM_SCHEMA |
| 31 | + assert schema is not None |
| 32 | + assert len(schema) > 0 |
| 33 | + |
| 34 | + # Verify the new fields are in the full schema |
| 35 | + full_field_names = [field.name for field in schema] |
| 36 | + for field_name in required_new_fields: |
| 37 | + assert field_name in full_field_names, f"{field_name} not found in PSM_SCHEMA" |
| 38 | + |
| 39 | + |
| 40 | +@pytest.mark.skipif(not PYARROW_AVAILABLE, reason="PyArrow not available") |
| 41 | +def test_fragment_annotation_fields_types(): |
| 42 | + """Test that the new fragment annotation fields have correct types.""" |
| 43 | + |
| 44 | + # Get the new fields from PSM_UNIQUE_FIELDS |
| 45 | + field_dict = {field.name: field for field in PSM_UNIQUE_FIELDS} |
| 46 | + |
| 47 | + # Test charge_array field |
| 48 | + charge_field = field_dict["charge_array"] |
| 49 | + assert charge_field.type == pa.list_( |
| 50 | + pa.int32() |
| 51 | + ), "charge_array should be list of int32" |
| 52 | + assert charge_field.nullable, "charge_array should be nullable" |
| 53 | + |
| 54 | + # Test ion_type_array field |
| 55 | + ion_type_field = field_dict["ion_type_array"] |
| 56 | + assert ion_type_field.type == pa.list_( |
| 57 | + pa.string() |
| 58 | + ), "ion_type_array should be list of string" |
| 59 | + assert ion_type_field.nullable, "ion_type_array should be nullable" |
| 60 | + |
| 61 | + # Test ion_mobility_array field |
| 62 | + ion_mobility_field = field_dict["ion_mobility_array"] |
| 63 | + assert ion_mobility_field.type == pa.list_( |
| 64 | + pa.float32() |
| 65 | + ), "ion_mobility_array should be list of float32" |
| 66 | + assert ion_mobility_field.nullable, "ion_mobility_array should be nullable" |
| 67 | + |
| 68 | + |
| 69 | +@pytest.mark.skipif(not PYARROW_AVAILABLE, reason="PyArrow not available") |
| 70 | +def test_fragment_annotation_fields_have_descriptions(): |
| 71 | + """Test that the new fragment annotation fields have proper descriptions.""" |
| 72 | + |
| 73 | + field_dict = {field.name: field for field in PSM_UNIQUE_FIELDS} |
| 74 | + |
| 75 | + # Check that all new fields have descriptions |
| 76 | + for field_name in ["charge_array", "ion_type_array", "ion_mobility_array"]: |
| 77 | + field = field_dict[field_name] |
| 78 | + assert field.metadata is not None, f"{field_name} should have metadata" |
| 79 | + assert ( |
| 80 | + b"description" in field.metadata |
| 81 | + ), f"{field_name} should have description in metadata" |
| 82 | + description = field.metadata[b"description"].decode() |
| 83 | + assert len(description) > 0, f"{field_name} should have non-empty description" |
| 84 | + assert ( |
| 85 | + "fragment" in description.lower() |
| 86 | + ), f"{field_name} description should mention 'fragment'" |
| 87 | + |
| 88 | + |
| 89 | +@pytest.mark.skipif(not PYARROW_AVAILABLE, reason="PyArrow not available") |
| 90 | +def test_create_table_with_fragment_annotations(): |
| 91 | + """Test creating a PyArrow table with the new fragment annotation fields.""" |
| 92 | + |
| 93 | + # Create sample data with all required fields including the new ones |
| 94 | + sample_data = { |
| 95 | + # Basic required fields |
| 96 | + "sequence": ["PEPTIDE"], |
| 97 | + "peptidoform": ["PEPTIDE"], |
| 98 | + "precursor_charge": [2], |
| 99 | + "posterior_error_probability": [0.01], |
| 100 | + "is_decoy": [0], |
| 101 | + "calculated_mz": [450.2], |
| 102 | + "observed_mz": [450.21], |
| 103 | + "mp_accessions": [["P12345"]], |
| 104 | + "predicted_rt": [None], |
| 105 | + "reference_file_name": ["sample1"], |
| 106 | + "scan": ["1000"], |
| 107 | + "rt": [120.5], |
| 108 | + "ion_mobility": [None], |
| 109 | + # Array fields |
| 110 | + "number_peaks": [3], |
| 111 | + "mz_array": [[100.1, 200.2, 300.3]], |
| 112 | + "intensity_array": [[1000.0, 2000.0, 3000.0]], |
| 113 | + # New fragment annotation fields |
| 114 | + "charge_array": [[1, 1, 2]], |
| 115 | + "ion_type_array": [["b", "y", "b"]], |
| 116 | + "ion_mobility_array": [[0.8, 0.9, 1.0]], |
| 117 | + } |
| 118 | + |
| 119 | + # Create a minimal schema for testing |
| 120 | + test_fields = [ |
| 121 | + pa.field("sequence", pa.string()), |
| 122 | + pa.field("peptidoform", pa.string()), |
| 123 | + pa.field("precursor_charge", pa.int32()), |
| 124 | + pa.field("posterior_error_probability", pa.float32()), |
| 125 | + pa.field("is_decoy", pa.int32()), |
| 126 | + pa.field("calculated_mz", pa.float32()), |
| 127 | + pa.field("observed_mz", pa.float32()), |
| 128 | + pa.field("mp_accessions", pa.list_(pa.string())), |
| 129 | + pa.field("predicted_rt", pa.float32()), |
| 130 | + pa.field("reference_file_name", pa.string()), |
| 131 | + pa.field("scan", pa.string()), |
| 132 | + pa.field("rt", pa.float32()), |
| 133 | + pa.field("ion_mobility", pa.float32()), |
| 134 | + pa.field("number_peaks", pa.int32()), |
| 135 | + pa.field("mz_array", pa.list_(pa.float32())), |
| 136 | + pa.field("intensity_array", pa.list_(pa.float32())), |
| 137 | + pa.field("charge_array", pa.list_(pa.int32())), |
| 138 | + pa.field("ion_type_array", pa.list_(pa.string())), |
| 139 | + pa.field("ion_mobility_array", pa.list_(pa.float32())), |
| 140 | + ] |
| 141 | + |
| 142 | + test_schema = pa.schema(test_fields) |
| 143 | + |
| 144 | + # Create table - this should not raise an exception |
| 145 | + table = pa.table(sample_data, schema=test_schema) |
| 146 | + |
| 147 | + # Verify table structure |
| 148 | + assert table.num_rows == 1 |
| 149 | + assert table.num_columns == len(test_fields) |
| 150 | + |
| 151 | + # Verify the new fields contain expected data |
| 152 | + assert table.column("charge_array").to_pylist() == [[1, 1, 2]] |
| 153 | + assert table.column("ion_type_array").to_pylist() == [["b", "y", "b"]] |
| 154 | + assert table.column("ion_mobility_array").to_pylist() == [[0.8, 0.9, 1.0]] |
| 155 | + |
| 156 | + |
| 157 | +@pytest.mark.skipif(not PYARROW_AVAILABLE, reason="PyArrow not available") |
| 158 | +def test_nullable_fragment_annotation_fields(): |
| 159 | + """Test that fragment annotation fields can contain null values.""" |
| 160 | + |
| 161 | + # Create sample data where fragment annotation fields are null |
| 162 | + sample_data = { |
| 163 | + "sequence": ["PEPTIDE"], |
| 164 | + "peptidoform": ["PEPTIDE"], |
| 165 | + "precursor_charge": [2], |
| 166 | + "mz_array": [[100.1, 200.2, 300.3]], |
| 167 | + "intensity_array": [[1000.0, 2000.0, 3000.0]], |
| 168 | + "number_peaks": [3], |
| 169 | + # New fragment annotation fields as null |
| 170 | + "charge_array": [None], |
| 171 | + "ion_type_array": [None], |
| 172 | + "ion_mobility_array": [None], |
| 173 | + } |
| 174 | + |
| 175 | + test_fields = [ |
| 176 | + pa.field("sequence", pa.string()), |
| 177 | + pa.field("peptidoform", pa.string()), |
| 178 | + pa.field("precursor_charge", pa.int32()), |
| 179 | + pa.field("mz_array", pa.list_(pa.float32())), |
| 180 | + pa.field("intensity_array", pa.list_(pa.float32())), |
| 181 | + pa.field("number_peaks", pa.int32()), |
| 182 | + pa.field("charge_array", pa.list_(pa.int32())), |
| 183 | + pa.field("ion_type_array", pa.list_(pa.string())), |
| 184 | + pa.field("ion_mobility_array", pa.list_(pa.float32())), |
| 185 | + ] |
| 186 | + |
| 187 | + test_schema = pa.schema(test_fields) |
| 188 | + |
| 189 | + # Create table with null values - this should not raise an exception |
| 190 | + table = pa.table(sample_data, schema=test_schema) |
| 191 | + |
| 192 | + # Verify null values are handled correctly |
| 193 | + assert table.column("charge_array").to_pylist() == [None] |
| 194 | + assert table.column("ion_type_array").to_pylist() == [None] |
| 195 | + assert table.column("ion_mobility_array").to_pylist() == [None] |
0 commit comments