From f59bfe252a7f0d0a6eabf25d6c66a159fac18eb9 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Wed, 14 Jan 2026 22:44:38 +0100 Subject: [PATCH 01/44] Track lib/ directory: --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 0be50aa..8515e0c 100644 --- a/.gitignore +++ b/.gitignore @@ -21,7 +21,6 @@ dist/ downloads/ eggs/ .eggs/ -lib/ lib64/ parts/ sdist/ From f4f625a89bd2e0a81467f344a819706324f2e6ad Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Wed, 14 Jan 2026 22:45:25 +0100 Subject: [PATCH 02/44] [Refactor] Split lib.py into 3 modules in 1 subpackage --- src/lmd/lib.py | 1406 ---------------------------------- src/lmd/lib/__init__.py | 4 + src/lmd/lib/_geom.py | 658 ++++++++++++++++ src/lmd/lib/_segmentation.py | 595 ++++++++++++++ src/lmd/lib/_utils.py | 166 ++++ 5 files changed, 1423 insertions(+), 1406 deletions(-) delete mode 100755 src/lmd/lib.py create mode 100644 src/lmd/lib/__init__.py create mode 100644 src/lmd/lib/_geom.py create mode 100644 src/lmd/lib/_segmentation.py create mode 100644 src/lmd/lib/_utils.py diff --git a/src/lmd/lib.py b/src/lmd/lib.py deleted file mode 100755 index fd32adb..0000000 --- a/src/lmd/lib.py +++ /dev/null @@ -1,1406 +0,0 @@ -from __future__ import annotations - -import csv -import gc -import multiprocessing as mp -import os -import platform -import re -import sys - -# import warnings -import warnings -from collections.abc import Iterable -from concurrent.futures import ProcessPoolExecutor, as_completed -from functools import partial, reduce -from pathlib import Path -from typing import Any, Callable, Optional, Union - -import geopandas as gpd -import matplotlib.pyplot as plt -import matplotlib.ticker as ticker -import networkx as nx -import numpy as np -import pandas as pd -import scipy -import shapely -from lxml import etree as ET -from matplotlib import image - -# import warnings -from rdp import rdp -from scipy import ndimage -from scipy.signal import convolve2d -from scipy.spatial import cKDTree -from skimage import color, data -from skimage.morphology import binary_erosion, disk -from skimage.morphology import dilation as binary_dilation -from skimage.segmentation import find_boundaries -from svgelements import SVG -from tqdm.auto import tqdm - -from lmd.segmentation import ( - _create_coord_index, - _create_coord_index_sparse, - _filter_coord_index, - calc_len, - get_coordinate_form, - tsp_greedy_solve, - tsp_hilbert_solve, -) - - -# TODO: Rename tqdm_args to tqdm_kwargs -def _execute_indexed_parallel(func: Callable, *, args: list, tqdm_args: dict = None, n_threads: int = 10) -> list: - """parallelization of function call with indexed arguments using ThreadPoolExecutor. Returns a list of results in the order of the input arguments. - - Args: - func (Callable): _description_ - args (list): _description_ - tqdm_args (dict, optional): _description_. Defaults to None. - n_threads (int, optional): _description_. Defaults to 10. - - Returns: - list: containing the results of the function calls in the same order as the input arguments - """ - if tqdm_args is None: - tqdm_args = {"total": len(args)} - elif "total" not in tqdm_args: - tqdm_args["total"] = len(args) - - results = [None for _ in range(len(args))] - with ProcessPoolExecutor(n_threads) as executor: - with tqdm(**tqdm_args) as pbar: - futures = {executor.submit(func, *arg): i for i, arg in enumerate(args)} - for future in as_completed(futures): - index = futures[future] - results[index] = future.result() - pbar.update(1) - - return results - - -class Collection: - """Class which is used for creating shape collections for the Leica LMD6 & 7. Contains a coordinate system defined by calibration points and a collection of various shapes. - - Args: - calibration_points: Calibration coordinates in the form of :math:`(3, 2)`. - orientation_transform: defines transformations performed on the provided coordinate system prior to export as XML. Defaults to the identity matrix. - - Attributes: - shapes (List[Shape]): Contains all shapes which are part of the collection. - calibration_points (Optional[np.ndarray]): Calibration coordinates in the form of :math:`(3, 2)`. - orientation_transform (np.ndarray): defines transformations performed on the provided coordinate system prior to export as XML. This orientation_transform is always applied to shapes when there is no individual orientation_transform provided. - """ - - def __init__( - self, - calibration_points: np.ndarray | None = None, - orientation_transform: np.ndarray | None = None, - scale: float = 100, - ): - self.shapes: list[Shape] = [] - - self.calibration_points: np.ndarray | None = calibration_points - - if orientation_transform is None: - orientation_transform = np.eye(2) # assign default value - - self.orientation_transform: np.ndarray = orientation_transform - - self.scale: float = scale - - self.global_coordinates = 1 - - def stats(self): - """Print statistics about the Collection in the form of: - - .. code-block:: - - ===== Collection Stats ===== - Number of shapes: 208 - Number of vertices: 126,812 - ============================ - Mean vertices: 609.67 - Min vertices: 220.00 - 5% percentile vertices: 380.20 - Median vertices: 594.00 - 95% percentile vertices: 893.20 - Max vertices: 1,300.00 - - """ - lengths = np.array([len(shape.points) for shape in self.shapes]) - - num_shapes = len(self.shapes) - num_vertices = np.sum(lengths) - - median_dp = np.median(lengths).astype(float) - mean_dp = np.mean(lengths).astype(float) - max_dp = np.max(lengths).astype(float) - min_dp = np.min(lengths).astype(float) - percentile_5 = np.percentile(lengths, 5).astype(float) - percentile_95 = np.percentile(lengths, 95).astype(float) - - print("===== Collection Stats =====") - print(f"Number of shapes: {num_shapes:,}") - print(f"Number of vertices: {num_vertices:,}") - print("============================") - print(f"Mean vertices: {mean_dp:,.0f}") - print(f"Min vertices: {min_dp:,.0f}") - print(f"5% percentile vertices: {percentile_5:,.0f}") - print(f"Median vertices: {median_dp:,.0f}") - print(f"95% percentile vertices: {percentile_95:,.0f}") - print(f"Max vertices: {max_dp:,.0f}") - - def plot( - self, - calibration: bool = True, - mode: str = "line", - fig_size: tuple = (5, 5), - apply_orientation_transform: bool = True, - apply_scale: bool = False, - save_name: str | None = None, - return_fig: bool = False, - **kwargs, - ): - """This function can be used to plot all shapes of the corresponding shape collection. - - Args: - calibration: Controls wether the calibration points should be plotted as crosshairs. Deactivating the crosshairs will result in the size of the canvas adapting to the shapes. Can be especially usefull for small shapes or debugging. - - fig_size: Defaults to :math:`(10, 10)` Controls the size of the matplotlib figure. See `matplotlib documentation `_ for more information. - - apply_orientation_transform: Define wether the orientation transform should be applied before plotting. - - save_name (Optional[str], default: None): Specify a filename for saving the generated figure. By default `None` is provided which will not save a figure. - """ - - modes = ["line", "dots"] - - # Check if Collection scale should be applied or not - if apply_scale: - scale = self.scale - else: - scale = 1 - - if mode not in modes: - raise ValueError("Mode not known. Please use one of the following plotting modes: line, dots") - - # close current figures - plt.clf() - plt.cla() - plt.close("all") - - fig, ax = plt.subplots(figsize=fig_size, **kwargs) - - # Plot calibration points - if calibration and self.calibration_points is not None: - # Apply orientation transform as default behavior - if apply_orientation_transform: - calibration_points = self.calibration_points @ self.orientation_transform * scale - else: - calibration_points = self.calibration_points * scale - - plt.scatter(calibration_points[:, 0], calibration_points[:, 1], marker="x") - - for shape in self.shapes: - # Apply orientation transform as default behavior - if apply_orientation_transform: - # Use local transform if defined, else use Collection transform - if shape.orientation_transform is not None: - points = shape.points @ shape.orientation_transform * scale - else: - points = shape.points @ self.orientation_transform * scale - else: - points = shape.points * scale - - if mode == "line": - ax.plot(points[:, 0], points[:, 1]) - - elif mode == "dots": - ax.scatter(points[:, 0], points[:, 1], s=10) - - ax.grid(True) - ax.ticklabel_format(useOffset=False) - ax.set_xlabel("x-axis") - ax.set_ylabel("y-axis") - ax.set_aspect("equal", adjustable="box") - - fig.tight_layout() - - if save_name is not None: - plt.savefig(save_name) - - if return_fig: - return fig - - plt.show() - - def add_shape(self, shape: Shape): - """Add a new shape to the collection. - - Args: - shape: Shape which should be added. - """ - - if isinstance(shape, Shape): - self.shapes.append(shape) - else: - TypeError("Provided shape is not of type Shape") - - def new_shape( - self, - points: np.ndarray, - well: str | None = None, - name: str | None = None, - **custom_attributes, - ): - """Directly create a new Shape in the current collection. - - Args: - points: Array or list of lists in the shape of `(N,2)`. Contains the points of the polygon forming a shape. - - well: Well in which to sort the shape after cutting. For example A1, A2 or B3. - - name: Name of the shape. - - custom_attributes: Custom shape metadata that can be added as additional xml-element to the shape. - All values are converted to strings. - - """ - to_add = Shape( - points, - well=well, - name=name, - orientation_transform=self.orientation_transform, - **custom_attributes, - ) - self.add_shape(to_add) - - def join(self, collection: Collection, update_orientation_transform: bool = True): - """Join the collection with the shapes of a different collection. The calibration markers of the current collection are kept. Please keep in mind that coordinate systems and calibration points must be compatible for correct joining of collections. - - Args: - collection: Collection which should be joined with the current collection object. - orientation_transform: If set to True, the orientation transform of the joined collection will be updated to the current collection. If set to False, the orientation transform of the joined collection will not be updated. - - Returns: - returns self - """ - if not np.all(self.orientation_transform == collection.orientation_transform): - if update_orientation_transform: - shapes = collection.shapes - for shape in shapes: - shape.orientation_transform = self.orientation_transform - else: - warnings.warn( - "Orientation transform of the joined collection is not equal to the current collection, but update_orientation_transform is set to False. Shapes will be merged without updating the orientation transform.", - stacklevel=2, - ) - self.shapes += collection.shapes - - return self - - def to_geopandas(self, *attrs: str) -> gpd.GeoDataFrame: - """Return geopandas dataframe of collection - - Args: - *attrs (str): Optional attributes of the shapes in the collection to be added as metadata columns - - Returns: - geopandas.GeoDataFrame: Representation of all shapes and optional metadata - - Example: - .. code-block:: python - # Generate collection - collection = pylmd.Collection() - shape = pylmd.Shape( - np.array([[ 0, 0], [ 0, -1], [ 1, 0], [ 0, 0]]), - well="A1", - name="Shape_1", - metadata1="A", - metadata2="B", - orientation_transform=None - ) - collection.add_shape(shape) - - # Get geopandas object - collection.to_geopandas() - > geometry - 0 POLYGON ((0 0, 0 -1, 1 0, 0 0)) - - collection.to_geopandas("well", "name", "metadata1", "metadata2") - > well name metadata1 metadata2 geometry - 0 A1 Shape_1 A B POLYGON ((0 0, 0 -1, 1 0, 0 0)) - """ - metadata = ( - pd.DataFrame( - [[shape.get_shape_annotation(att) for att in attrs] for shape in self.shapes], - columns=attrs, - ) - if (attrs is not None) - else None - ) - geometry = [shape.to_shapely() for shape in self.shapes] - - return gpd.GeoDataFrame(data=metadata, geometry=geometry) - - # load xml from file - def load(self, file_location: str, *, raise_shape_errors: bool = False): - """Can be used to load a shape file from XML. Both, XMLs generated with py-lmd and the Leica software can be used. - Args: - file_location: File path pointing to the XML file. - raise_errors: Whether to raise errors during shape collection. If `False` raises a warning. - - """ - - tree = ET.parse(file_location) - root = tree.getroot() - - cal_point_len = 0 - - # count calibration points - for child in root: - if "CalibrationPoint" in child.tag: - cal_point_len += 1 - - self.calibration_points = np.ones((cal_point_len // 2, 2), dtype=int) - - for child in root: - if child.tag == "GlobalCoordinates": - self.global_coordinates = int(child.text) - - # Load calibration points - elif "CalibrationPoint" in child.tag: - axes = child.tag[0] - axes_id = 0 if axes == "X" else 1 - shape_id = int(child.tag[-1]) - 1 - value = int(child.text) - - self.calibration_points[shape_id, axes_id] = value - - # Load shapes - elif "Shape_" in child.tag: - try: - new_shape = Shape.from_xml(child) - self.shapes.append(new_shape) - - except ValueError as e: - if raise_shape_errors: - raise ValueError(e) from e - else: - warnings.warn(str(e), stacklevel=1) - continue - - def load_geopandas( - self, - gdf: gpd.GeoDataFrame, - geometry_column: str = "geometry", - name_column: str | None = None, - well_column: str | None = None, - calibration_points: np.ndarray | None = None, - global_coordinates: int | None = None, - custom_attribute_columns: str | list[str] | None = None, - ) -> None: - """Create collection from a geopandas dataframe - - Args: - gdf (geopandas.GeoDataFrame): Collection of shapes and optional metadata - geometry_column (str, default: geometry): Name of column storing Shapes as `shapely.Polygon`, defaults to geometry - well_column (str, optional): Column storing of well id as additional metadata - calibration_points (np.ndarray, optional): Calibration points of collection - global_coordinates (int, optional): Number of global coordinates - custom_attribute_columns Custom shape metadata that will be added as additional xml-element to the shape. - Can be column name, list of column names or None - - Example: - - .. code-block:: python - - from lmd.lib import Collection - import geopandas as gpd - import shapely - - gdf = gpd.GeoDataFrame( - data={"well": ["A1"], "name": ["test"]}, geometry=[shapely.Polygon([[0, 0], [0, 1], [1, 0], [0, 0]])] - ) - - # Create collection - c = Collection() - - # Export well metadata - c.load_geopandas(gdf, well_column="well") - assert c.to_geopandas("well").equals(gdf) - - # Do not export well metadata - c.load_geopandas(gdf) - assert c.to_geopandas().equals(gdf.drop(columns="well")) - """ - # Update attributes - if calibration_points is not None: - self.calibration_points = calibration_points - if global_coordinates is not None: - self.global_coordinates = global_coordinates - - if custom_attribute_columns is None: - custom_attribute_columns = [] - if isinstance(custom_attribute_columns, str): - custom_attribute_columns = [custom_attribute_columns] - - self.shapes = [ - Shape( - points=np.array(row[geometry_column].exterior.coords), - name=row[name_column] if name_column is not None else None, - well=row[well_column] if well_column is not None else None, - **{att: row[att] for att in custom_attribute_columns}, - ) - for _, row in gdf.iterrows() - ] - - # save xml to file - def save(self, file_location: str, encoding: str = "utf-8"): - """Can be used to save the shape collection as XML file. - - file_location: File path pointing to the XML file. - """ - - root = ET.Element("ImageData") - - # write global coordinates - global_coordinates = ET.SubElement(root, "GlobalCoordinates") - global_coordinates.text = "1" - - # transform calibration points - transformed_calibration_points = self.calibration_points @ self.orientation_transform * self.scale - - # write calibration points - for i, point in enumerate(transformed_calibration_points): - print(point) - - id = i + 1 - x = ET.SubElement(root, f"X_CalibrationPoint_{id}") - x.text = f"{np.floor(point[0]).astype(int)}" - - y = ET.SubElement(root, f"Y_CalibrationPoint_{id}") - y.text = f"{np.floor(point[1]).astype(int)}" - - # write shape length - shape_count = ET.SubElement(root, "ShapeCount") - shape_count.text = f"{len(self.shapes)}" - - # write shapes - for i, shape in enumerate(self.shapes): - id = i + 1 - - # apply Collection orientation_transform and scale - root.append(shape.to_xml(id, self.orientation_transform, self.scale)) - - # write root - tree = ET.ElementTree(element=root) - tree.write(file_location, encoding="utf-8", xml_declaration=True, pretty_print=True) - - def svg_to_lmd( - self, - file_location, - offset=None, - divisor=3, - multiplier=60, - rotation_matrix=np.eye(2), - orientation_transform=None, - ): - """Can be used to save the shape collection as XML file. - - Args: - file_location: File path pointing to the SVG file. - - orientation_transform: Will superseed the global transform of the Collection. - - rotation_matrix: - - """ - - if offset is None: - offset = [0, 0] - orientation_transform = self.orientation_transform if orientation_transform is None else orientation_transform - - svg = SVG.parse(file_location) - list(svg.elements()) - - for path in svg: - pl = [] - n_points = int(path.length() // divisor) - linspace = np.linspace(0, 1, n_points) - - for index in linspace: - poly = np.array(path.point(index)) - pl.append([poly[0], -poly[1]]) - - arr = np.array(pl) @ rotation_matrix * multiplier + offset - - to_add = Shape(points=arr, orientation_transform=orientation_transform) - self.add_shape(to_add) - - -class Shape: - """Class for creating a single shape object.""" - - def __init__( - self, - points: np.ndarray = np.empty((1, 2)), - well: str | None = None, - name: str | None = None, - orientation_transform=None, - **custom_attributes: dict[str, str], - ): - """Class for creating a single shape. - - Args: - points: Array or list of lists in the shape of `(N,2)`. Contains the points of the polygon forming a shape. - - well: Well in which to sort the shape after cutting. For example A1, A2 or B3. - - name: Name of the shape. - - custom_attributes: Custom shape metadata that will be added as additional xml-element to the shape - Values be implicitly converted to strings. - """ - # Orientation transform of shapes - self.orientation_transform: np.ndarray | None = orientation_transform - - # Although a numpy array is recommended, list of lists is accepted - points = np.array(points) - - # Assert correct dimensions - point_shapes = points.shape - if (points.ndim != 2) or (point_shapes[1] != 2): - raise ValueError( - f"Shape {name}: Shape dimensionality is not valid. Please provide a numpy array of shape (N, 2)" - ) - - if len(points) < 3: - raise ValueError( - f"Shape {name}: Valid shape must contain at least 3 points, but only contains {len(points)}" - ) - - self.points: np.ndarray = points - - self.name: str | None = name - self.well: str | None = well - - self.custom_attributes = custom_attributes - - @classmethod - def from_xml(cls, root, orientation_transform: np.ndarray | None = None): - """Load a shape from an XML shape node. Used internally for reading LMD generated XML files. - - Args: - root: XML input node. - """ - name = root.tag - well = None - custom_attributes = {} - - # get number of points - point_count = int(root.find("PointCount").text) - points = np.empty((point_count, 2), dtype=int) - - # compile regex - xpattern = re.compile(r"X_(\d+)") - ypattern = re.compile(r"Y_(\d+)") - - # parse all points - for child in root: - xmatch = re.findall(xpattern, child.tag) - ymatch = re.findall(ypattern, child.tag) - - if xmatch: - point_id = int(xmatch[0]) - 1 - points[point_id, 0] = int(child.text) - elif ymatch: - point_id = int(ymatch[0]) - 1 - points[point_id, 1] = int(child.text) - elif child.tag == "CapID": - well = str(child.text) - else: - if child.tag in custom_attributes: - warnings.warn( - f"Shape attribute {child.tag} already found in shape, overwrite", - stacklevel=1, - ) - custom_attributes[child.tag] = child.text - - points = np.array(points) - - return cls( - points=points, - name=name, - well=well, - orientation_transform=orientation_transform, - **custom_attributes, - ) - - def to_xml( - self, - id: int, - orientation_transform: np.ndarray, - scale: float, - *, - write_custom_attributes: bool = True, - ): - """Generate XML shape node needed internally for export. - - Args: - id: Sequential identifier of the shape as used in the LMD XML format. - - orientation_transform (np.array): Pass orientation_transform which is used if no local orientation transform is set. - - scale (float): Scalling factor used to enable higher decimal precision. - - write_custom_attributes: Write custom attributes to xml file - - Note: - If the Shape has a custom orientation_transform defined, the custom orientation_transform is applied at this point. If not, the oritenation_transform passed by the parent Collection is used. This highlights an important difference between the Shape and Collection class. The Collection will always has an orientation transform defined and will use `np.eye(2)` by default. The Shape object can have a orientation_transform but can also be set to `None` to use the Collection value. - - """ - - # Apply orientation transform. If the Shape has a custom orientation_transform defined, the custom orientation_transform is applied at this point. If not, the oritenation_transform passed by the parent Collection is used. This highlights an important difference between the Shape and Collection class. The Collection will always has an orientation transform defined and will use `np.eye(2)` by default. The Shape object can have a orientation_transform but can also be set to `None` to use the Collection value. - - if self.orientation_transform is not None: - transformed_points = self.points @ self.orientation_transform * scale - else: - transformed_points = self.points @ orientation_transform * scale - - shape = ET.Element(f"Shape_{id}") - - point_count = ET.SubElement(shape, "PointCount") - point_count.text = f"{len(transformed_points)}" - - if self.well is not None: - cap_id = ET.SubElement(shape, "CapID") - cap_id.text = self.well - - if write_custom_attributes: - for attribute_name, attribute_value in self.custom_attributes.items(): - custom_attribute = ET.SubElement(shape, attribute_name) - # xml only accepts string values - custom_attribute.text = str(attribute_value) - - # write points - for i, point in enumerate(transformed_points): - id = i + 1 - x = ET.SubElement(shape, f"X_{id}") - x.text = f"{np.floor(point[0]).astype(int)}" - - y = ET.SubElement(shape, f"Y_{id}") - y.text = f"{np.floor(point[1]).astype(int)}" - - return shape - - def get_shape_annotation(self, name: str) -> Any | None: - """Retrieve the value of an attribute from either instance attributes - or custom attributes by name. - - Searches for the attribute by name in the 1) instance attributes - 2) custom attributes, or 3) issues a warning and returns None - - Args: - name (str): The name of the attribute to retrieve. - - Returns: - Any | None: The value of the attribute if found, otherwise None. - """ - if name in self.__dict__: - return getattr(self, name) - elif name in self.custom_attributes: - return self.custom_attributes.get(name) - else: - warnings.warn(f"Attribute {name} not found in shape attributes. Returning None.", stacklevel=2) - return None - - def to_shapely(self): - return shapely.Polygon(self.points) - - -class SegmentationLoader: - """Select single cells from a segmentation and generate cutting data - - Args: - config (dict): Dict containing configuration parameters. See Note for further explanation. - processes (int): Number of processes used for parallel processing of cell sets. Total processes can be calculated as `processes * threads`. - threads (int): Number of threads used for parallel processing of shapes within a cell set. Total processes can be calculated as `processes * threads`. - - cell_sets (list(dict)): List of dictionaries containing the sets of cells which should be sorted into a single well. - - calibration_marker (np.array): Array of size '(3,2)' containing the calibration marker coordinates in the '(row, column)' format. - - coords_lookup (None, dict): precalculated lookup table for coordinates of individual cell ids. If not provided will be calculated. - - classes (np.array): Array of classes found in the provided segmentation mask. If not provided will be calculated based on the assumption that cell_ids are assigned in ascending order. - - Example: - - .. code-block:: python - - import numpy as np - from PIL import Image - from lmd.lib import SegmentationLoader - - Args: - config (dict): Dict containing configuration parameters. See Note for further explanation. - - cell_sets (list(dict)): List of dictionaries containing the sets of cells which should be sorted into a single well. - - calibration_marker (np.array): Array of size '(3,2)' containing the calibration marker coordinates in the '(row, column)' format. - - Example: - - .. code-block:: python - - import numpy as np - from PIL import Image - from lmd.lib import SegmentationLoader - from lmd._utils import _download_segmentation_example_file - - # use example image provided within py-lmd - example_image_path = _download_segmentation_example_file() - im = Image.open(example_image_path) - segmentation = np.array(im).astype(np.uint32) - - all_classes = np.unique(segmentation) - - cell_sets = [{"classes": all_classes, "well": "A1"}] - - calibration_points = np.array([[0, 0], [0, 1000], [1000, 1000]]) - - loader_config = {"orientation_transform": np.array([[0, -1], [1, 0]])} - - sl = SegmentationLoader(config=loader_config) - shape_collection = sl(segmentation, cell_sets, calibration_points) - - shape_collection.plot(fig_size=(10, 10)) - - .. image:: images/segmentation1.png - - Note: - - Basic explanation of the parameters in the config dict: - - .. code-block:: yaml - - # dilation of the cutting mask in pixel before intersecting shapes in a selection group are merged - shape_dilation: 0 - - # erosion of the cutting mask in pixel before intersecting shapes in a selection group are merged - shape_erosion: 0 - - # Cutting masks are transformed by binary dilation and erosion - binary_smoothing: 3 - - # number of datapoints which are averaged for smoothing - # the resoltion of datapoints is twice as high as the resolution of pixel - convolution_smoothing: 15 - - # strength of coordinate reduction through the Ramer-Douglas-Peucker algorithm 0 is small 1 is very high - rdp_epsilon: 0.1 - - # Optimization of the cutting path inbetween shapes - # optimized paths improve the cutting time and the microscopes focus - # valid options are ["none", "hilbert", "greedy"] - path_optimization: "hilbert" - - # Paramter required for hilbert curve based path optimization. - # Defines the order of the hilbert curve used, which needs to be tuned with the total cutting area. - # For areas of 1 x 1 mm we recommend at least p = 4, for whole slides we recommend p = 7. - hilbert_p: 7 - - # Parameter required for greedy path optimization. - # Instead of a global distance matrix, the k nearest neighbours are approximated. - # The optimization problem is then greedily solved for the known set of nearest neighbours until the first set of neighbours is exhausted. - # Established edges are then removed and the nearest neighbour approximation is recursivly repeated. - greedy_k: 20 - - # Overlapping shapes are merged based on a nearest neighbour heuristic. - # All selected shapes closer than distance_heuristic pixel are checked for overlap. - distance_heuristic: 300 - - - """ - - # define all valid path optimization methods used with the "path_optimization" argument in the configuration - VALID_PATH_OPTIMIZERS = ["none", "hilbert", "greedy"] - DEFAULT_SEGMENTATION_DTYPE = np.uint64 - - def __init__(self, config=None, verbose=False, processes=1): - if config is None: - config = {} - self.config = config - self.verbose = verbose - self._get_context() # setup context for multiprocessing function calls to work with different operating systems - - self.register_parameter("shape_dilation", 0) - self.register_parameter("shape_erosion", 0) - self.register_parameter("binary_smoothing", 3) - self.register_parameter("convolution_smoothing", 15) - self.register_parameter("rdp_epsilon", 0.1) - self.register_parameter("path_optimization", "hilbert") - self.register_parameter("greedy_k", 0) - self.register_parameter("hilbert_p", 7) - self.register_parameter("xml_decimal_transform", 100) - self.register_parameter("distance_heuristic", 300) - self.register_parameter("join_intersecting", True) - self.register_parameter("orientation_transform", np.eye(2)) - self.register_parameter("threads", 10) - - self.coords_lookup = None - self.processes = processes - - self._configure_path_optimizer() - - def _configure_path_optimizer(self): - # configure path optimizer - if "path_optimization" in self.config: - optimization_method = self.config["path_optimization"] - else: - optimization_method = "none" - - # check if the optimizer is a valid option - if optimization_method in self.VALID_PATH_OPTIMIZERS: - pathoptimizer = optimization_method - else: - self.log("Path optimizer is no valid option, no optimization will be used.") - pathoptimizer = "none" - - self.log(f"Path optimizer used for XML generation: {optimization_method}") - self.optimization_method = pathoptimizer - - def _get_context(self): - if platform.system() == "Windows": - self.context = "spawn" - elif platform.system() == "Darwin": - self.context = "spawn" - elif platform.system() == "Linux": - self.context = "fork" - - def __call__( - self, - input_segmentation: np.ndarray | None, - cell_sets, - calibration_points, - coords_lookup=None, - classes=np.array([], dtype=np.uint64), - ): - if input_segmentation is None: - assert coords_lookup is not None, "If no input segmentation is provided, a coords_lookup must be provided." - - self.calibration_points = calibration_points - sets = [] - - # iterate over all defined sets, perform sanity checks and load external data - for i, cell_set in enumerate(cell_sets): - self.check_cell_set_sanity(cell_set) - cell_set["classes_loaded"] = self.load_classes(cell_set) - sets.append(cell_set) - self.log(f"cell set {i} passed sanity check") - - if len(sets) < self.processes: - self.processes = len(sets) # reduce number of processes if there are less cell sets than processes - - self.input_segmentation = input_segmentation - - if coords_lookup is None: - self.log("Calculating coordinate locations of all cells.") - # deprecated infavour of more computationally efficient solution - # self.coords_lookup = _create_coord_index(self.input_segmentation, classes = classes) - # self.coords_lookup = {k: np.array(v) for k, v in self.coords_lookup.items()} - self.coords_lookup = _create_coord_index_sparse(self.input_segmentation) - else: - self.log("Loading coordinates from external source") - self.coords_lookup = coords_lookup - - # try multithreading - if self.processes > 1: - self.multi_threading = True - self.log("Processing cell sets in parallel") - args = [] - for i, cell_set in enumerate(cell_sets): - args.append((i, cell_set)) - - collections = _execute_indexed_parallel( - self.generate_cutting_data, - args=args, - tqdm_args={ - "file": sys.stdout, - "disable": not self.verbose, - "desc": "collecting cell sets", - }, - n_threads=self.processes, - ) - else: - print("Processing cell sets in serial") - self.multi_threading = False - collections = [] - for i, cell_set in enumerate(cell_sets): - collections.append(self.generate_cutting_data(i, cell_set)) - - return reduce(lambda a, b: a.join(b), collections) - - def generate_cutting_data(self, i: int, cell_set: dict) -> Collection: - if 0 in cell_set["classes_loaded"]: - cell_set["classes_loaded"] = cell_set["classes_loaded"][cell_set["classes_loaded"] != 0] - warnings.warn("Class 0 is not a valid class and was removed from the cell set", stacklevel=2) - - self.log("Convert label format into coordinate format") - center, length, coords = get_coordinate_form(cell_set["classes_loaded"], self.coords_lookup) - - self.log("Conversion finished, performing sanity check.") - - # Sanity check 1 - if len(center) == len(cell_set["classes_loaded"]): - pass - else: - self.log( - "Check failed, returned lengths do not match cell set.\n Some classes were not found in the segmentation and were therefore removed.\n Please make sure all classes specified are present in your segmentation." - ) - elements_removed = len(cell_set["classes_loaded"]) - len(center) - self.log(f"{elements_removed} classes were not found and therefore removed.") - - # Sanity check 2: for the returned coordinates - if len(center) == len(length): - pass - else: - self.log( - "Check failed, returned lengths do not match. Please check if all classes specified are present in your segmentation" - ) - - # Sanity check 3 - zero_elements = 0 - for el in coords: - if len(el) == 0: - zero_elements += 1 - - if zero_elements <= 2: # allow at most for 2 zero elements (x = 0 and y = 0) - pass - else: - self.log( - "Check failed, returned coordinates contain empty elements. Please check if all classes specified are present in your segmentation" - ) - - if self.config["join_intersecting"]: - center, length, coords = self.merge_dilated_shapes( - center, length, coords, dilation=self.config["shape_dilation"], erosion=self.config["shape_erosion"] - ) - - # Calculate dilation and erosion based on if merging was activated - dilation = ( - self.config["binary_smoothing"] - if self.config["join_intersecting"] - else self.config["binary_smoothing"] + self.config["shape_dilation"] - ) - erosion = ( - self.config["binary_smoothing"] - if self.config["join_intersecting"] - else self.config["binary_smoothing"] + self.config["shape_erosion"] - ) - - if self.config["threads"] == 1: - shapes = [] - for coord in tqdm(coords, desc="creating shapes"): - shapes.append(transform_to_map(coord, dilation=dilation, erosion=erosion, coord_format=False)) - else: - with mp.get_context(self.context).Pool(processes=self.config["threads"]) as pool: - shapes = list( - tqdm( - pool.imap( - partial( - transform_to_map, - erosion=erosion, - dilation=dilation, - coord_format=False, - ), - coords, - ), - total=len(center), - disable=not self.verbose, - desc="creating shapes", - ) - ) - - if self.config["threads"] == 1: - polygons = [] - for shape in tqdm(shapes, desc="calculating polygons"): - polygons.append( - _create_poly( - shape, - smoothing_filter_size=self.config["convolution_smoothing"], - rdp_epsilon=self.config["rdp_epsilon"], - ) - ) - else: - with mp.get_context(self.context).Pool(processes=self.config["threads"]) as pool: - polygons = list( - tqdm( - pool.imap( - partial( - _create_poly, - smoothing_filter_size=self.config["convolution_smoothing"], - rdp_epsilon=self.config["rdp_epsilon"], - ), - shapes, - ), - total=len(center), - disable=not self.verbose, - desc="calculating polygons", - ) - ) - - # perform path optimization to minimize the total distance that the LMD travels during cutting (this improves cutting speed and focus) - center = np.array(center) - unoptimized_length = calc_len(center) - self.log(f"Current path length: {unoptimized_length:,.2f} units") - - if self.optimization_method != "none": - if self.optimization_method == "greedy": - optimized_idx = tsp_greedy_solve(center, k=self.config["greedy_k"]) - - elif self.optimization_method == "hilbert": - optimized_idx = tsp_hilbert_solve(center, p=self.config["hilbert_p"]) - - # update order of centers - center = center[optimized_idx] - self.indexes = optimized_idx - - # calculate optimized path length and optimization factor - optimized_length = calc_len(center) - self.log(f"Optimized path length: {optimized_length:,.2f} units") - - optimization_factor = unoptimized_length / optimized_length - self.log(f"Optimization factor: {optimization_factor:,.1f}x") - else: - self.log("No path optimization used") - optimization_factor = 1 - optimized_idx = list(range(len(center))) - # order list of shapes by the optimized index array - polygons = [x for _, x in sorted(zip(optimized_idx, polygons))] - - # Plot coordinates if in debug mode - if self.verbose: - fig, axs = plt.subplots(1, 1, figsize=(10, 10)) - - if "background_image" in self.config: - axs.imshow(self.config["background_image"]) - - axs.scatter(center[:, 1], center[:, 0], s=1) - - for shape in polygons: - axs.plot(shape[:, 1], shape[:, 0], color="red", linewidth=1) - - axs.scatter( - self.calibration_points[:, 1], - self.calibration_points[:, 0], - color="blue", - ) - axs.plot(center[:, 1], center[:, 0], color="grey") - axs.invert_yaxis() - axs.set_aspect("equal", adjustable="box") - axs.axis("off") - axs.set_title("Final cutting path") - fig.tight_layout() - - if self.multi_threading: - self.log("Plotting shapes in debug mode is not supported in multi-threading mode.") - self.log("Saving plots to disk instead.") - fig.savefig(f"debug_plot_{i}.png") - plt.close(fig) - else: - plt.show(fig) - - # Generate array of marker cross positions - ds = Collection(calibration_points=self.calibration_points, scale=self.config["xml_decimal_transform"]) - ds.orientation_transform = self.config["orientation_transform"] - - for shape in polygons: - # Check if well key is set in cell set definition - if "well" in cell_set: - ds.new_shape(shape, well=cell_set["well"]) - else: - ds.new_shape(shape) - return ds - - def merge_dilated_shapes(self, input_center, input_length, input_coords, dilation=0, erosion=0): - print("Intersecting Shapes will be merged into a single shape.") - - # initialize all shapes and create dilated coordinates - # coordinates are created as complex numbers to facilitate comparison with np.isin - dilated_coords = [] - - if self.config["threads"] == 1: - for coord in tqdm(input_coords, desc="dilating shapes"): - dilated_coords.append(transform_to_map(coord, dilation=dilation)) - - else: - with mp.get_context(self.context).Pool(processes=self.config["threads"]) as pool: - dilated_coords = list( - tqdm( - pool.imap(partial(transform_to_map, dilation=dilation), input_coords), - total=len(input_center), - desc="dilating shapes", - ) - ) - - dilated_coords = [np.apply_along_axis(lambda args: [complex(*args)], 1, d).flatten() for d in dilated_coords] - - # A sparse distance matrix is calculated for all cells which are closer than distance_heuristic - center_arr = np.array(input_center) - center_tree = cKDTree(center_arr) - - sparse_distance = center_tree.sparse_distance_matrix(center_tree, self.config["distance_heuristic"]) - sparse_distance = scipy.sparse.tril(sparse_distance) - - # sparse intersection matrix is calculated based on the sparse distance matrix - intersect_data = [] - for col, row in zip(sparse_distance.col, sparse_distance.row): - # diagonal entries are known to intersect - if col == row: - intersect_data.append(1) - else: - # np.isin is used with the two complex coordinate arrays - do_intersect = np.isin(dilated_coords[col], dilated_coords[row]).any() - # intersect_data uses the same columns and rows as sparse_distance - # if two shapes intersect, an edge is created otherwise, no edge is created. - # zero entries will be dropped later - intersect_data.append(1 if do_intersect else 0) - - # create sparse intersection matrix and drop zero elements - sparse_intersect = scipy.sparse.coo_matrix((intersect_data, (sparse_distance.row, sparse_distance.col))) - sparse_intersect.eliminate_zeros() - - # create networkx graph from sparse intersection matrix - g = nx.from_scipy_sparse_array(sparse_intersect) - - # find unconnected subgraphs - # to_merge contains a list of lists with indexes pointing to shapes to be merged - to_merge = [list(g.subgraph(c).nodes()) for c in nx.connected_components(g)] - - output_center = [] - output_length = [] - output_coords = [] - - # merge coords - for new_shape in to_merge: - coords = [] - - for idx in new_shape: - coords.append(dilated_coords[idx]) - - coords_complex = np.concatenate(coords) - coords_complex = np.unique(coords_complex) - coords_2d = np.array([coords_complex.real, coords_complex.imag], dtype=int).T - - # calculate properties length and center from coords - new_center = np.mean(coords_2d, axis=0) - new_len = len(coords_2d) - - # append values to output lists - output_center.append(new_center) - output_length.append(new_len) - output_coords.append(coords_2d) - - print( - len(to_merge) - len(output_center), - "shapes that were intersecting were found and merged.", - ) - return output_center, output_length, output_coords - - def check_cell_set_sanity(self, cell_set): - """Check if cell_set dictionary contains the right keys""" - - if "classes" in cell_set: - if not isinstance(cell_set["classes"], (list, str, np.ndarray)): - self.log("No list of classes specified for cell set") - raise TypeError("No list of classes specified for cell set") - else: - self.log("No classes specified for cell set") - raise KeyError("No classes specified for cell set") - - if "well" in cell_set: - if not isinstance(cell_set["well"], str): - self.log("No well of type str specified for cell set") - raise TypeError("No well of type str specified for cell set") - - def load_classes(self, cell_set): - """Identify cell class definition and load classes - - Identify if cell classes are provided as list of integers or as path pointing to a csv file. - Depending on the type of the cell set, the classes are loaded and returned for selection. - """ - if isinstance(cell_set["classes"], list): - return cell_set["classes"] - - if isinstance(cell_set["classes"], np.ndarray): - if np.issubdtype(cell_set["classes"].dtype.type, np.integer): - return cell_set["classes"] - - if isinstance(cell_set["classes"], str): - # If the path is relative, it is interpreted relative to the project directory - if os.path.isabs(cell_set["classes"]): - path = cell_set["classes"] - else: - path = os.path.join(Path(self.directory).parents[0], cell_set["classes"]) - - if os.path.isfile(path): - try: - cr = csv.reader(open(path)) - filtered_classes = np.array([int(el[0]) for el in list(cr)], dtype="int64") - self.log(f"Loaded {len(filtered_classes)} classes from csv") - return filtered_classes - except (ValueError, TypeError) as e: - self.log(f"CSV file could not be converted to list of integers: {path}") - raise ValueError() from e - else: - self.log("Path containing classes could not be read: {path}") - raise ValueError() - else: - self.log( - "classes argument for a cell set needs to be a list of integer ids or a path pointing to a csv of integer ids." - ) - raise TypeError( - "classes argument for a cell set needs to be a list of integer ids or a path pointing to a csv of integer ids." - ) - - def log(self, msg): - if self.verbose: - print(msg) - - def register_parameter(self, key, value): - if isinstance(key, str): - config_handle = self.config - - elif isinstance(key, list): - raise NotImplementedError("registration of parameters is not yet supported for nested parameters") - - else: - raise TypeError("Key musst be of string or a list of strings") - - if key not in config_handle: - self.log(f"No configuration for {key} found, parameter will be set to {value}") - config_handle[key] = value - - -def transform_to_map(coords, dilation=0, erosion=0, coord_format=True, debug=False): - # safety boundary which extands the generated map size - safety_offset = 3 - dilation_offset = int(dilation) - - coords = np.array(coords).astype(int) - - # top left offset used for creating the offset map - if coords.size == 0: - raise ValueError("coords array is empty; cannot compute minimum.") - offset = np.min(coords, axis=0) - safety_offset - dilation_offset - mat = np.array([offset, [0, 0]]) - offset = np.max(mat, axis=0) - - offset_coords = coords - offset - offset_coords = offset_coords.astype(np.uint) - - offset_map_size = np.max(offset_coords, axis=0) + 2 * safety_offset + dilation_offset - offset_map_size = offset_map_size.astype(np.uint) - - offset_map = np.zeros(offset_map_size, dtype=np.ubyte) - - y = tuple(offset_coords.T[0]) - x = tuple(offset_coords.T[1]) - - offset_map[(y, x)] = 1 - - if debug: - plt.imshow(offset_map) - plt.show() - - offset_map = binary_dilation(offset_map, footprint=disk(dilation)) - offset_map = binary_erosion(offset_map, footprint=disk(erosion)) - offset_map = ndimage.binary_fill_holes(offset_map).astype(int) - - if debug: - plt.imshow(offset_map) - plt.show() - - # coord_format will return a sparse format of [[2, 1],[1, 2],[0, 2]] - # otherwise will return a dense matrix and the offset [[0, 0, 1],[0, 0, 1],[0, 1, 0]] - - if coord_format: - idx_local = np.argwhere(offset_map == 1) - idx_global = idx_local + offset - return idx_global - else: - return (offset_map, offset) - - -def _create_poly( - in_tuple, - smoothing_filter_size: int = 12, - rdp_epsilon: float = 0, - debug: bool = False, -): - """Converts a list of pixels into a polygon. - Args - smoothing_filter_size (int, default = 12): The smoothing filter is the circular convolution with a vector of length smoothing_filter_size and all elements 1 / smoothing_filter_size. - - rdp_epsilon (float, default = 0 ): When compression is wanted, this specifies the epsilon value for the Ramer-Douglas-Peucker algorithm. Higher values will result in more compression. - - dilation (int, default = 0): Binary dilation used before polygon creation for increasing the mask size. This Dilation ignores potential neighbours. Neighbour aware dilation of segmentation mask needs to be defined during segmentation. - """ - (offset_map, offset) = in_tuple - - # find polygon bounds from mask - bounds = find_boundaries(offset_map, connectivity=1, mode="subpixel", background=0) - - edges = np.array(np.where(bounds == 1)) / 2 - edges = edges.T - edges = _sort_edges(edges) - - # smoothing resulting shape - smk = np.ones((smoothing_filter_size, 1)) / smoothing_filter_size - edges = convolve2d(edges, smk, mode="full", boundary="wrap") - - # compression of the resulting polygon - poly = rdp(edges, epsilon=rdp_epsilon) # Ramer-Douglas-Peucker algorithm for polygon simplification - - # debuging - """ - print(self.poly.shape) - fig = plt.figure(frameon=False) - fig.set_size_inches(10,10) - ax = plt.Axes(fig, [0., 0., 1., 1.]) - ax.set_axis_off() - fig.add_axes(ax) - ax.imshow(bounds) - ax.plot(edges[:,1]*2,edges[:,0]*2) - ax.plot(self.poly[:,1]*2,self.poly[:,0]*2) - """ - - return poly + offset - - -def _sort_edges(edges): - """Sorts the vertices of the polygon. - Greedy sorting is performed, might have difficulties with complex shapes. - - """ - - it = len(edges) - new = [] - new.append(edges[0]) - - edges = np.delete(edges, 0, 0) - - for i in range(1, it): - old = np.array(new[i - 1]) - - dist = np.linalg.norm(edges - old, axis=1) - - min_index = np.argmin(dist) - new.append(edges[min_index]) - edges = np.delete(edges, min_index, 0) - - return np.array(new) diff --git a/src/lmd/lib/__init__.py b/src/lmd/lib/__init__.py new file mode 100644 index 0000000..d2c4763 --- /dev/null +++ b/src/lmd/lib/__init__.py @@ -0,0 +1,4 @@ +"""Core logic of the library""" + +from ._geom import Collection, Shape +from ._segmentation import SegmentationLoader diff --git a/src/lmd/lib/_geom.py b/src/lmd/lib/_geom.py new file mode 100644 index 0000000..9436888 --- /dev/null +++ b/src/lmd/lib/_geom.py @@ -0,0 +1,658 @@ +"""Core geometrical objects""" + +from __future__ import annotations + +import re + +# import warnings +import warnings +from typing import Any + +import geopandas as gpd +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import shapely +from lxml import etree as ET +from svgelements import SVG + + +class Shape: + """Class for creating a single shape object.""" + + def __init__( + self, + points: np.ndarray = np.empty((1, 2)), + well: str | None = None, + name: str | None = None, + orientation_transform=None, + **custom_attributes: dict[str, str], + ): + """Class for creating a single shape. + + Args: + points: Array or list of lists in the shape of `(N,2)`. Contains the points of the polygon forming a shape. + + well: Well in which to sort the shape after cutting. For example A1, A2 or B3. + + name: Name of the shape. + + custom_attributes: Custom shape metadata that will be added as additional xml-element to the shape + Values be implicitly converted to strings. + """ + # Orientation transform of shapes + self.orientation_transform: np.ndarray | None = orientation_transform + + # Although a numpy array is recommended, list of lists is accepted + points = np.array(points) + + # Assert correct dimensions + point_shapes = points.shape + if (points.ndim != 2) or (point_shapes[1] != 2): + raise ValueError( + f"Shape {name}: Shape dimensionality is not valid. Please provide a numpy array of shape (N, 2)" + ) + + if len(points) < 3: + raise ValueError( + f"Shape {name}: Valid shape must contain at least 3 points, but only contains {len(points)}" + ) + + self.points: np.ndarray = points + + self.name: str | None = name + self.well: str | None = well + + self.custom_attributes = custom_attributes + + @classmethod + def from_xml(cls, root, orientation_transform: np.ndarray | None = None): + """Load a shape from an XML shape node. Used internally for reading LMD generated XML files. + + Args: + root: XML input node. + """ + name = root.tag + well = None + custom_attributes = {} + + # get number of points + point_count = int(root.find("PointCount").text) + points = np.empty((point_count, 2), dtype=int) + + # compile regex + xpattern = re.compile(r"X_(\d+)") + ypattern = re.compile(r"Y_(\d+)") + + # parse all points + for child in root: + xmatch = re.findall(xpattern, child.tag) + ymatch = re.findall(ypattern, child.tag) + + if xmatch: + point_id = int(xmatch[0]) - 1 + points[point_id, 0] = int(child.text) + elif ymatch: + point_id = int(ymatch[0]) - 1 + points[point_id, 1] = int(child.text) + elif child.tag == "CapID": + well = str(child.text) + else: + if child.tag in custom_attributes: + warnings.warn( + f"Shape attribute {child.tag} already found in shape, overwrite", + stacklevel=1, + ) + custom_attributes[child.tag] = child.text + + points = np.array(points) + + return cls( + points=points, + name=name, + well=well, + orientation_transform=orientation_transform, + **custom_attributes, + ) + + def to_xml( + self, + id: int, + orientation_transform: np.ndarray, + scale: float, + *, + write_custom_attributes: bool = True, + ): + """Generate XML shape node needed internally for export. + + Args: + id: Sequential identifier of the shape as used in the LMD XML format. + + orientation_transform (np.array): Pass orientation_transform which is used if no local orientation transform is set. + + scale (float): Scalling factor used to enable higher decimal precision. + + write_custom_attributes: Write custom attributes to xml file + + Note: + If the Shape has a custom orientation_transform defined, the custom orientation_transform is applied at this point. If not, the oritenation_transform passed by the parent Collection is used. This highlights an important difference between the Shape and Collection class. The Collection will always has an orientation transform defined and will use `np.eye(2)` by default. The Shape object can have a orientation_transform but can also be set to `None` to use the Collection value. + + """ + + # Apply orientation transform. If the Shape has a custom orientation_transform defined, the custom orientation_transform is applied at this point. If not, the oritenation_transform passed by the parent Collection is used. This highlights an important difference between the Shape and Collection class. The Collection will always has an orientation transform defined and will use `np.eye(2)` by default. The Shape object can have a orientation_transform but can also be set to `None` to use the Collection value. + + if self.orientation_transform is not None: + transformed_points = self.points @ self.orientation_transform * scale + else: + transformed_points = self.points @ orientation_transform * scale + + shape = ET.Element(f"Shape_{id}") + + point_count = ET.SubElement(shape, "PointCount") + point_count.text = f"{len(transformed_points)}" + + if self.well is not None: + cap_id = ET.SubElement(shape, "CapID") + cap_id.text = self.well + + if write_custom_attributes: + for attribute_name, attribute_value in self.custom_attributes.items(): + custom_attribute = ET.SubElement(shape, attribute_name) + # xml only accepts string values + custom_attribute.text = str(attribute_value) + + # write points + for i, point in enumerate(transformed_points): + id = i + 1 + x = ET.SubElement(shape, f"X_{id}") + x.text = f"{np.floor(point[0]).astype(int)}" + + y = ET.SubElement(shape, f"Y_{id}") + y.text = f"{np.floor(point[1]).astype(int)}" + + return shape + + def get_shape_annotation(self, name: str) -> Any | None: + """Retrieve the value of an attribute from either instance attributes + or custom attributes by name. + + Searches for the attribute by name in the 1) instance attributes + 2) custom attributes, or 3) issues a warning and returns None + + Args: + name (str): The name of the attribute to retrieve. + + Returns: + Any | None: The value of the attribute if found, otherwise None. + """ + if name in self.__dict__: + return getattr(self, name) + elif name in self.custom_attributes: + return self.custom_attributes.get(name) + else: + warnings.warn(f"Attribute {name} not found in shape attributes. Returning None.", stacklevel=2) + return None + + def to_shapely(self): + return shapely.Polygon(self.points) + + +class Collection: + """Class which is used for creating shape collections for the Leica LMD6 & 7. Contains a coordinate system defined by calibration points and a collection of various shapes. + + Args: + calibration_points: Calibration coordinates in the form of :math:`(3, 2)`. + orientation_transform: defines transformations performed on the provided coordinate system prior to export as XML. Defaults to the identity matrix. + + Attributes: + shapes (List[Shape]): Contains all shapes which are part of the collection. + calibration_points (Optional[np.ndarray]): Calibration coordinates in the form of :math:`(3, 2)`. + orientation_transform (np.ndarray): defines transformations performed on the provided coordinate system prior to export as XML. This orientation_transform is always applied to shapes when there is no individual orientation_transform provided. + """ + + def __init__( + self, + calibration_points: np.ndarray | None = None, + orientation_transform: np.ndarray | None = None, + scale: float = 100, + ): + self.shapes: list[Shape] = [] + + self.calibration_points: np.ndarray | None = calibration_points + + if orientation_transform is None: + orientation_transform = np.eye(2) # assign default value + + self.orientation_transform: np.ndarray = orientation_transform + + self.scale: float = scale + + self.global_coordinates = 1 + + def stats(self): + """Print statistics about the Collection in the form of: + + .. code-block:: + + ===== Collection Stats ===== + Number of shapes: 208 + Number of vertices: 126,812 + ============================ + Mean vertices: 609.67 + Min vertices: 220.00 + 5% percentile vertices: 380.20 + Median vertices: 594.00 + 95% percentile vertices: 893.20 + Max vertices: 1,300.00 + + """ + lengths = np.array([len(shape.points) for shape in self.shapes]) + + num_shapes = len(self.shapes) + num_vertices = np.sum(lengths) + + median_dp = np.median(lengths).astype(float) + mean_dp = np.mean(lengths).astype(float) + max_dp = np.max(lengths).astype(float) + min_dp = np.min(lengths).astype(float) + percentile_5 = np.percentile(lengths, 5).astype(float) + percentile_95 = np.percentile(lengths, 95).astype(float) + + print("===== Collection Stats =====") + print(f"Number of shapes: {num_shapes:,}") + print(f"Number of vertices: {num_vertices:,}") + print("============================") + print(f"Mean vertices: {mean_dp:,.0f}") + print(f"Min vertices: {min_dp:,.0f}") + print(f"5% percentile vertices: {percentile_5:,.0f}") + print(f"Median vertices: {median_dp:,.0f}") + print(f"95% percentile vertices: {percentile_95:,.0f}") + print(f"Max vertices: {max_dp:,.0f}") + + def plot( + self, + calibration: bool = True, + mode: str = "line", + fig_size: tuple = (5, 5), + apply_orientation_transform: bool = True, + apply_scale: bool = False, + save_name: str | None = None, + return_fig: bool = False, + **kwargs, + ): + """This function can be used to plot all shapes of the corresponding shape collection. + + Args: + calibration: Controls wether the calibration points should be plotted as crosshairs. Deactivating the crosshairs will result in the size of the canvas adapting to the shapes. Can be especially usefull for small shapes or debugging. + + fig_size: Defaults to :math:`(10, 10)` Controls the size of the matplotlib figure. See `matplotlib documentation `_ for more information. + + apply_orientation_transform: Define wether the orientation transform should be applied before plotting. + + save_name (Optional[str], default: None): Specify a filename for saving the generated figure. By default `None` is provided which will not save a figure. + """ + + modes = ["line", "dots"] + + # Check if Collection scale should be applied or not + if apply_scale: + scale = self.scale + else: + scale = 1 + + if mode not in modes: + raise ValueError("Mode not known. Please use one of the following plotting modes: line, dots") + + # close current figures + plt.clf() + plt.cla() + plt.close("all") + + fig, ax = plt.subplots(figsize=fig_size, **kwargs) + + # Plot calibration points + if calibration and self.calibration_points is not None: + # Apply orientation transform as default behavior + if apply_orientation_transform: + calibration_points = self.calibration_points @ self.orientation_transform * scale + else: + calibration_points = self.calibration_points * scale + + plt.scatter(calibration_points[:, 0], calibration_points[:, 1], marker="x") + + for shape in self.shapes: + # Apply orientation transform as default behavior + if apply_orientation_transform: + # Use local transform if defined, else use Collection transform + if shape.orientation_transform is not None: + points = shape.points @ shape.orientation_transform * scale + else: + points = shape.points @ self.orientation_transform * scale + else: + points = shape.points * scale + + if mode == "line": + ax.plot(points[:, 0], points[:, 1]) + + elif mode == "dots": + ax.scatter(points[:, 0], points[:, 1], s=10) + + ax.grid(True) + ax.ticklabel_format(useOffset=False) + ax.set_xlabel("x-axis") + ax.set_ylabel("y-axis") + ax.set_aspect("equal", adjustable="box") + + fig.tight_layout() + + if save_name is not None: + plt.savefig(save_name) + + if return_fig: + return fig + + plt.show() + + def add_shape(self, shape: Shape): + """Add a new shape to the collection. + + Args: + shape: Shape which should be added. + """ + + if isinstance(shape, Shape): + self.shapes.append(shape) + else: + TypeError("Provided shape is not of type Shape") + + def new_shape( + self, + points: np.ndarray, + well: str | None = None, + name: str | None = None, + **custom_attributes, + ): + """Directly create a new Shape in the current collection. + + Args: + points: Array or list of lists in the shape of `(N,2)`. Contains the points of the polygon forming a shape. + + well: Well in which to sort the shape after cutting. For example A1, A2 or B3. + + name: Name of the shape. + + custom_attributes: Custom shape metadata that can be added as additional xml-element to the shape. + All values are converted to strings. + + """ + to_add = Shape( + points, + well=well, + name=name, + orientation_transform=self.orientation_transform, + **custom_attributes, + ) + self.add_shape(to_add) + + def join(self, collection: Collection, update_orientation_transform: bool = True): + """Join the collection with the shapes of a different collection. The calibration markers of the current collection are kept. Please keep in mind that coordinate systems and calibration points must be compatible for correct joining of collections. + + Args: + collection: Collection which should be joined with the current collection object. + orientation_transform: If set to True, the orientation transform of the joined collection will be updated to the current collection. If set to False, the orientation transform of the joined collection will not be updated. + + Returns: + returns self + """ + if not np.all(self.orientation_transform == collection.orientation_transform): + if update_orientation_transform: + shapes = collection.shapes + for shape in shapes: + shape.orientation_transform = self.orientation_transform + else: + warnings.warn( + "Orientation transform of the joined collection is not equal to the current collection, but update_orientation_transform is set to False. Shapes will be merged without updating the orientation transform.", + stacklevel=2, + ) + self.shapes += collection.shapes + + return self + + def to_geopandas(self, *attrs: str) -> gpd.GeoDataFrame: + """Return geopandas dataframe of collection + + Args: + *attrs (str): Optional attributes of the shapes in the collection to be added as metadata columns + + Returns: + geopandas.GeoDataFrame: Representation of all shapes and optional metadata + + Example: + .. code-block:: python + # Generate collection + collection = pylmd.Collection() + shape = pylmd.Shape( + np.array([[ 0, 0], [ 0, -1], [ 1, 0], [ 0, 0]]), + well="A1", + name="Shape_1", + metadata1="A", + metadata2="B", + orientation_transform=None + ) + collection.add_shape(shape) + + # Get geopandas object + collection.to_geopandas() + > geometry + 0 POLYGON ((0 0, 0 -1, 1 0, 0 0)) + + collection.to_geopandas("well", "name", "metadata1", "metadata2") + > well name metadata1 metadata2 geometry + 0 A1 Shape_1 A B POLYGON ((0 0, 0 -1, 1 0, 0 0)) + """ + metadata = ( + pd.DataFrame( + [[shape.get_shape_annotation(att) for att in attrs] for shape in self.shapes], + columns=attrs, + ) + if (attrs is not None) + else None + ) + geometry = [shape.to_shapely() for shape in self.shapes] + + return gpd.GeoDataFrame(data=metadata, geometry=geometry) + + # load xml from file + def load(self, file_location: str, *, raise_shape_errors: bool = False): + """Can be used to load a shape file from XML. Both, XMLs generated with py-lmd and the Leica software can be used. + Args: + file_location: File path pointing to the XML file. + raise_errors: Whether to raise errors during shape collection. If `False` raises a warning. + + """ + + tree = ET.parse(file_location) + root = tree.getroot() + + cal_point_len = 0 + + # count calibration points + for child in root: + if "CalibrationPoint" in child.tag: + cal_point_len += 1 + + self.calibration_points = np.ones((cal_point_len // 2, 2), dtype=int) + + for child in root: + if child.tag == "GlobalCoordinates": + self.global_coordinates = int(child.text) + + # Load calibration points + elif "CalibrationPoint" in child.tag: + axes = child.tag[0] + axes_id = 0 if axes == "X" else 1 + shape_id = int(child.tag[-1]) - 1 + value = int(child.text) + + self.calibration_points[shape_id, axes_id] = value + + # Load shapes + elif "Shape_" in child.tag: + try: + new_shape = Shape.from_xml(child) + self.shapes.append(new_shape) + + except ValueError as e: + if raise_shape_errors: + raise ValueError(e) from e + else: + warnings.warn(str(e), stacklevel=1) + continue + + def load_geopandas( + self, + gdf: gpd.GeoDataFrame, + geometry_column: str = "geometry", + name_column: str | None = None, + well_column: str | None = None, + calibration_points: np.ndarray | None = None, + global_coordinates: int | None = None, + custom_attribute_columns: str | list[str] | None = None, + ) -> None: + """Create collection from a geopandas dataframe + + Args: + gdf (geopandas.GeoDataFrame): Collection of shapes and optional metadata + geometry_column (str, default: geometry): Name of column storing Shapes as `shapely.Polygon`, defaults to geometry + well_column (str, optional): Column storing of well id as additional metadata + calibration_points (np.ndarray, optional): Calibration points of collection + global_coordinates (int, optional): Number of global coordinates + custom_attribute_columns Custom shape metadata that will be added as additional xml-element to the shape. + Can be column name, list of column names or None + + Example: + + .. code-block:: python + + from lmd.lib import Collection + import geopandas as gpd + import shapely + + gdf = gpd.GeoDataFrame( + data={"well": ["A1"], "name": ["test"]}, geometry=[shapely.Polygon([[0, 0], [0, 1], [1, 0], [0, 0]])] + ) + + # Create collection + c = Collection() + + # Export well metadata + c.load_geopandas(gdf, well_column="well") + assert c.to_geopandas("well").equals(gdf) + + # Do not export well metadata + c.load_geopandas(gdf) + assert c.to_geopandas().equals(gdf.drop(columns="well")) + """ + # Update attributes + if calibration_points is not None: + self.calibration_points = calibration_points + if global_coordinates is not None: + self.global_coordinates = global_coordinates + + if custom_attribute_columns is None: + custom_attribute_columns = [] + if isinstance(custom_attribute_columns, str): + custom_attribute_columns = [custom_attribute_columns] + + self.shapes = [ + Shape( + points=np.array(row[geometry_column].exterior.coords), + name=row[name_column] if name_column is not None else None, + well=row[well_column] if well_column is not None else None, + **{att: row[att] for att in custom_attribute_columns}, + ) + for _, row in gdf.iterrows() + ] + + # save xml to file + def save(self, file_location: str, encoding: str = "utf-8"): + """Can be used to save the shape collection as XML file. + + file_location: File path pointing to the XML file. + """ + + root = ET.Element("ImageData") + + # write global coordinates + global_coordinates = ET.SubElement(root, "GlobalCoordinates") + global_coordinates.text = "1" + + # transform calibration points + transformed_calibration_points = self.calibration_points @ self.orientation_transform * self.scale + + # write calibration points + for i, point in enumerate(transformed_calibration_points): + print(point) + + id = i + 1 + x = ET.SubElement(root, f"X_CalibrationPoint_{id}") + x.text = f"{np.floor(point[0]).astype(int)}" + + y = ET.SubElement(root, f"Y_CalibrationPoint_{id}") + y.text = f"{np.floor(point[1]).astype(int)}" + + # write shape length + shape_count = ET.SubElement(root, "ShapeCount") + shape_count.text = f"{len(self.shapes)}" + + # write shapes + for i, shape in enumerate(self.shapes): + id = i + 1 + + # apply Collection orientation_transform and scale + root.append(shape.to_xml(id, self.orientation_transform, self.scale)) + + # write root + tree = ET.ElementTree(element=root) + tree.write(file_location, encoding="utf-8", xml_declaration=True, pretty_print=True) + + def svg_to_lmd( + self, + file_location, + offset=None, + divisor=3, + multiplier=60, + rotation_matrix=np.eye(2), + orientation_transform=None, + ): + """Can be used to save the shape collection as XML file. + + Args: + file_location: File path pointing to the SVG file. + + orientation_transform: Will superseed the global transform of the Collection. + + rotation_matrix: + + """ + + if offset is None: + offset = [0, 0] + orientation_transform = self.orientation_transform if orientation_transform is None else orientation_transform + + svg = SVG.parse(file_location) + list(svg.elements()) + + for path in svg: + pl = [] + n_points = int(path.length() // divisor) + linspace = np.linspace(0, 1, n_points) + + for index in linspace: + poly = np.array(path.point(index)) + pl.append([poly[0], -poly[1]]) + + arr = np.array(pl) @ rotation_matrix * multiplier + offset + + to_add = Shape(points=arr, orientation_transform=orientation_transform) + self.add_shape(to_add) diff --git a/src/lmd/lib/_segmentation.py b/src/lmd/lib/_segmentation.py new file mode 100644 index 0000000..fb29d30 --- /dev/null +++ b/src/lmd/lib/_segmentation.py @@ -0,0 +1,595 @@ +from __future__ import annotations + +import csv +import multiprocessing as mp +import os +import platform +import sys + +# import warnings +import warnings +from functools import partial, reduce +from pathlib import Path + +import matplotlib.pyplot as plt +import networkx as nx +import numpy as np +import scipy +from scipy.spatial import cKDTree +from tqdm.auto import tqdm + +from lmd.segmentation import ( + _create_coord_index_sparse, + calc_len, + get_coordinate_form, + tsp_greedy_solve, + tsp_hilbert_solve, +) + +from ._geom import Collection +from ._utils import _create_poly, _execute_indexed_parallel, transform_to_map + + +class SegmentationLoader: + """Select single cells from a segmentation and generate cutting data + + Args: + config (dict): Dict containing configuration parameters. See Note for further explanation. + processes (int): Number of processes used for parallel processing of cell sets. Total processes can be calculated as `processes * threads`. + threads (int): Number of threads used for parallel processing of shapes within a cell set. Total processes can be calculated as `processes * threads`. + + cell_sets (list(dict)): List of dictionaries containing the sets of cells which should be sorted into a single well. + + calibration_marker (np.array): Array of size '(3,2)' containing the calibration marker coordinates in the '(row, column)' format. + + coords_lookup (None, dict): precalculated lookup table for coordinates of individual cell ids. If not provided will be calculated. + + classes (np.array): Array of classes found in the provided segmentation mask. If not provided will be calculated based on the assumption that cell_ids are assigned in ascending order. + + Example: + + .. code-block:: python + + import numpy as np + from PIL import Image + from lmd.lib import SegmentationLoader + + Args: + config (dict): Dict containing configuration parameters. See Note for further explanation. + + cell_sets (list(dict)): List of dictionaries containing the sets of cells which should be sorted into a single well. + + calibration_marker (np.array): Array of size '(3,2)' containing the calibration marker coordinates in the '(row, column)' format. + + Example: + + .. code-block:: python + + import numpy as np + from PIL import Image + from lmd.lib import SegmentationLoader + from lmd._utils import _download_segmentation_example_file + + # use example image provided within py-lmd + example_image_path = _download_segmentation_example_file() + im = Image.open(example_image_path) + segmentation = np.array(im).astype(np.uint32) + + all_classes = np.unique(segmentation) + + cell_sets = [{"classes": all_classes, "well": "A1"}] + + calibration_points = np.array([[0, 0], [0, 1000], [1000, 1000]]) + + loader_config = {"orientation_transform": np.array([[0, -1], [1, 0]])} + + sl = SegmentationLoader(config=loader_config) + shape_collection = sl(segmentation, cell_sets, calibration_points) + + shape_collection.plot(fig_size=(10, 10)) + + .. image:: images/segmentation1.png + + Note: + + Basic explanation of the parameters in the config dict: + + .. code-block:: yaml + + # dilation of the cutting mask in pixel before intersecting shapes in a selection group are merged + shape_dilation: 0 + + # erosion of the cutting mask in pixel before intersecting shapes in a selection group are merged + shape_erosion: 0 + + # Cutting masks are transformed by binary dilation and erosion + binary_smoothing: 3 + + # number of datapoints which are averaged for smoothing + # the resoltion of datapoints is twice as high as the resolution of pixel + convolution_smoothing: 15 + + # strength of coordinate reduction through the Ramer-Douglas-Peucker algorithm 0 is small 1 is very high + rdp_epsilon: 0.1 + + # Optimization of the cutting path inbetween shapes + # optimized paths improve the cutting time and the microscopes focus + # valid options are ["none", "hilbert", "greedy"] + path_optimization: "hilbert" + + # Paramter required for hilbert curve based path optimization. + # Defines the order of the hilbert curve used, which needs to be tuned with the total cutting area. + # For areas of 1 x 1 mm we recommend at least p = 4, for whole slides we recommend p = 7. + hilbert_p: 7 + + # Parameter required for greedy path optimization. + # Instead of a global distance matrix, the k nearest neighbours are approximated. + # The optimization problem is then greedily solved for the known set of nearest neighbours until the first set of neighbours is exhausted. + # Established edges are then removed and the nearest neighbour approximation is recursivly repeated. + greedy_k: 20 + + # Overlapping shapes are merged based on a nearest neighbour heuristic. + # All selected shapes closer than distance_heuristic pixel are checked for overlap. + distance_heuristic: 300 + + + """ + + # define all valid path optimization methods used with the "path_optimization" argument in the configuration + VALID_PATH_OPTIMIZERS = ["none", "hilbert", "greedy"] + DEFAULT_SEGMENTATION_DTYPE = np.uint64 + + def __init__(self, config=None, verbose=False, processes=1): + if config is None: + config = {} + self.config = config + self.verbose = verbose + self._get_context() # setup context for multiprocessing function calls to work with different operating systems + + self.register_parameter("shape_dilation", 0) + self.register_parameter("shape_erosion", 0) + self.register_parameter("binary_smoothing", 3) + self.register_parameter("convolution_smoothing", 15) + self.register_parameter("rdp_epsilon", 0.1) + self.register_parameter("path_optimization", "hilbert") + self.register_parameter("greedy_k", 0) + self.register_parameter("hilbert_p", 7) + self.register_parameter("xml_decimal_transform", 100) + self.register_parameter("distance_heuristic", 300) + self.register_parameter("join_intersecting", True) + self.register_parameter("orientation_transform", np.eye(2)) + self.register_parameter("threads", 10) + + self.coords_lookup = None + self.processes = processes + + self._configure_path_optimizer() + + def _configure_path_optimizer(self): + # configure path optimizer + if "path_optimization" in self.config: + optimization_method = self.config["path_optimization"] + else: + optimization_method = "none" + + # check if the optimizer is a valid option + if optimization_method in self.VALID_PATH_OPTIMIZERS: + pathoptimizer = optimization_method + else: + self.log("Path optimizer is no valid option, no optimization will be used.") + pathoptimizer = "none" + + self.log(f"Path optimizer used for XML generation: {optimization_method}") + self.optimization_method = pathoptimizer + + def _get_context(self): + if platform.system() == "Windows": + self.context = "spawn" + elif platform.system() == "Darwin": + self.context = "spawn" + elif platform.system() == "Linux": + self.context = "fork" + + def __call__( + self, + input_segmentation: np.ndarray | None, + cell_sets, + calibration_points, + coords_lookup=None, + classes=np.array([], dtype=np.uint64), + ): + if input_segmentation is None: + assert coords_lookup is not None, "If no input segmentation is provided, a coords_lookup must be provided." + + self.calibration_points = calibration_points + sets = [] + + # iterate over all defined sets, perform sanity checks and load external data + for i, cell_set in enumerate(cell_sets): + self.check_cell_set_sanity(cell_set) + cell_set["classes_loaded"] = self.load_classes(cell_set) + sets.append(cell_set) + self.log(f"cell set {i} passed sanity check") + + if len(sets) < self.processes: + self.processes = len(sets) # reduce number of processes if there are less cell sets than processes + + self.input_segmentation = input_segmentation + + if coords_lookup is None: + self.log("Calculating coordinate locations of all cells.") + # deprecated infavour of more computationally efficient solution + # self.coords_lookup = _create_coord_index(self.input_segmentation, classes = classes) + # self.coords_lookup = {k: np.array(v) for k, v in self.coords_lookup.items()} + self.coords_lookup = _create_coord_index_sparse(self.input_segmentation) + else: + self.log("Loading coordinates from external source") + self.coords_lookup = coords_lookup + + # try multithreading + if self.processes > 1: + self.multi_threading = True + self.log("Processing cell sets in parallel") + args = [] + for i, cell_set in enumerate(cell_sets): + args.append((i, cell_set)) + + collections = _execute_indexed_parallel( + self.generate_cutting_data, + args=args, + tqdm_args={ + "file": sys.stdout, + "disable": not self.verbose, + "desc": "collecting cell sets", + }, + n_threads=self.processes, + ) + else: + print("Processing cell sets in serial") + self.multi_threading = False + collections = [] + for i, cell_set in enumerate(cell_sets): + collections.append(self.generate_cutting_data(i, cell_set)) + + return reduce(lambda a, b: a.join(b), collections) + + def generate_cutting_data(self, i: int, cell_set: dict) -> Collection: + if 0 in cell_set["classes_loaded"]: + cell_set["classes_loaded"] = cell_set["classes_loaded"][cell_set["classes_loaded"] != 0] + warnings.warn("Class 0 is not a valid class and was removed from the cell set", stacklevel=2) + + self.log("Convert label format into coordinate format") + center, length, coords = get_coordinate_form(cell_set["classes_loaded"], self.coords_lookup) + + self.log("Conversion finished, performing sanity check.") + + # Sanity check 1 + if len(center) == len(cell_set["classes_loaded"]): + pass + else: + self.log( + "Check failed, returned lengths do not match cell set.\n Some classes were not found in the segmentation and were therefore removed.\n Please make sure all classes specified are present in your segmentation." + ) + elements_removed = len(cell_set["classes_loaded"]) - len(center) + self.log(f"{elements_removed} classes were not found and therefore removed.") + + # Sanity check 2: for the returned coordinates + if len(center) == len(length): + pass + else: + self.log( + "Check failed, returned lengths do not match. Please check if all classes specified are present in your segmentation" + ) + + # Sanity check 3 + zero_elements = 0 + for el in coords: + if len(el) == 0: + zero_elements += 1 + + if zero_elements <= 2: # allow at most for 2 zero elements (x = 0 and y = 0) + pass + else: + self.log( + "Check failed, returned coordinates contain empty elements. Please check if all classes specified are present in your segmentation" + ) + + if self.config["join_intersecting"]: + center, length, coords = self.merge_dilated_shapes( + center, length, coords, dilation=self.config["shape_dilation"], erosion=self.config["shape_erosion"] + ) + + # Calculate dilation and erosion based on if merging was activated + dilation = ( + self.config["binary_smoothing"] + if self.config["join_intersecting"] + else self.config["binary_smoothing"] + self.config["shape_dilation"] + ) + erosion = ( + self.config["binary_smoothing"] + if self.config["join_intersecting"] + else self.config["binary_smoothing"] + self.config["shape_erosion"] + ) + + if self.config["threads"] == 1: + shapes = [] + for coord in tqdm(coords, desc="creating shapes"): + shapes.append(transform_to_map(coord, dilation=dilation, erosion=erosion, coord_format=False)) + else: + with mp.get_context(self.context).Pool(processes=self.config["threads"]) as pool: + shapes = list( + tqdm( + pool.imap( + partial( + transform_to_map, + erosion=erosion, + dilation=dilation, + coord_format=False, + ), + coords, + ), + total=len(center), + disable=not self.verbose, + desc="creating shapes", + ) + ) + + if self.config["threads"] == 1: + polygons = [] + for shape in tqdm(shapes, desc="calculating polygons"): + polygons.append( + _create_poly( + shape, + smoothing_filter_size=self.config["convolution_smoothing"], + rdp_epsilon=self.config["rdp_epsilon"], + ) + ) + else: + with mp.get_context(self.context).Pool(processes=self.config["threads"]) as pool: + polygons = list( + tqdm( + pool.imap( + partial( + _create_poly, + smoothing_filter_size=self.config["convolution_smoothing"], + rdp_epsilon=self.config["rdp_epsilon"], + ), + shapes, + ), + total=len(center), + disable=not self.verbose, + desc="calculating polygons", + ) + ) + + # perform path optimization to minimize the total distance that the LMD travels during cutting (this improves cutting speed and focus) + center = np.array(center) + unoptimized_length = calc_len(center) + self.log(f"Current path length: {unoptimized_length:,.2f} units") + + if self.optimization_method != "none": + if self.optimization_method == "greedy": + optimized_idx = tsp_greedy_solve(center, k=self.config["greedy_k"]) + + elif self.optimization_method == "hilbert": + optimized_idx = tsp_hilbert_solve(center, p=self.config["hilbert_p"]) + + # update order of centers + center = center[optimized_idx] + self.indexes = optimized_idx + + # calculate optimized path length and optimization factor + optimized_length = calc_len(center) + self.log(f"Optimized path length: {optimized_length:,.2f} units") + + optimization_factor = unoptimized_length / optimized_length + self.log(f"Optimization factor: {optimization_factor:,.1f}x") + else: + self.log("No path optimization used") + optimization_factor = 1 + optimized_idx = list(range(len(center))) + # order list of shapes by the optimized index array + polygons = [x for _, x in sorted(zip(optimized_idx, polygons))] + + # Plot coordinates if in debug mode + if self.verbose: + fig, axs = plt.subplots(1, 1, figsize=(10, 10)) + + if "background_image" in self.config: + axs.imshow(self.config["background_image"]) + + axs.scatter(center[:, 1], center[:, 0], s=1) + + for shape in polygons: + axs.plot(shape[:, 1], shape[:, 0], color="red", linewidth=1) + + axs.scatter( + self.calibration_points[:, 1], + self.calibration_points[:, 0], + color="blue", + ) + axs.plot(center[:, 1], center[:, 0], color="grey") + axs.invert_yaxis() + axs.set_aspect("equal", adjustable="box") + axs.axis("off") + axs.set_title("Final cutting path") + fig.tight_layout() + + if self.multi_threading: + self.log("Plotting shapes in debug mode is not supported in multi-threading mode.") + self.log("Saving plots to disk instead.") + fig.savefig(f"debug_plot_{i}.png") + plt.close(fig) + else: + plt.show(fig) + + # Generate array of marker cross positions + ds = Collection(calibration_points=self.calibration_points, scale=self.config["xml_decimal_transform"]) + ds.orientation_transform = self.config["orientation_transform"] + + for shape in polygons: + # Check if well key is set in cell set definition + if "well" in cell_set: + ds.new_shape(shape, well=cell_set["well"]) + else: + ds.new_shape(shape) + return ds + + def merge_dilated_shapes(self, input_center, input_length, input_coords, dilation=0, erosion=0): + print("Intersecting Shapes will be merged into a single shape.") + + # initialize all shapes and create dilated coordinates + # coordinates are created as complex numbers to facilitate comparison with np.isin + dilated_coords = [] + + if self.config["threads"] == 1: + for coord in tqdm(input_coords, desc="dilating shapes"): + dilated_coords.append(transform_to_map(coord, dilation=dilation)) + + else: + with mp.get_context(self.context).Pool(processes=self.config["threads"]) as pool: + dilated_coords = list( + tqdm( + pool.imap(partial(transform_to_map, dilation=dilation), input_coords), + total=len(input_center), + desc="dilating shapes", + ) + ) + + dilated_coords = [np.apply_along_axis(lambda args: [complex(*args)], 1, d).flatten() for d in dilated_coords] + + # A sparse distance matrix is calculated for all cells which are closer than distance_heuristic + center_arr = np.array(input_center) + center_tree = cKDTree(center_arr) + + sparse_distance = center_tree.sparse_distance_matrix(center_tree, self.config["distance_heuristic"]) + sparse_distance = scipy.sparse.tril(sparse_distance) + + # sparse intersection matrix is calculated based on the sparse distance matrix + intersect_data = [] + for col, row in zip(sparse_distance.col, sparse_distance.row): + # diagonal entries are known to intersect + if col == row: + intersect_data.append(1) + else: + # np.isin is used with the two complex coordinate arrays + do_intersect = np.isin(dilated_coords[col], dilated_coords[row]).any() + # intersect_data uses the same columns and rows as sparse_distance + # if two shapes intersect, an edge is created otherwise, no edge is created. + # zero entries will be dropped later + intersect_data.append(1 if do_intersect else 0) + + # create sparse intersection matrix and drop zero elements + sparse_intersect = scipy.sparse.coo_matrix((intersect_data, (sparse_distance.row, sparse_distance.col))) + sparse_intersect.eliminate_zeros() + + # create networkx graph from sparse intersection matrix + g = nx.from_scipy_sparse_array(sparse_intersect) + + # find unconnected subgraphs + # to_merge contains a list of lists with indexes pointing to shapes to be merged + to_merge = [list(g.subgraph(c).nodes()) for c in nx.connected_components(g)] + + output_center = [] + output_length = [] + output_coords = [] + + # merge coords + for new_shape in to_merge: + coords = [] + + for idx in new_shape: + coords.append(dilated_coords[idx]) + + coords_complex = np.concatenate(coords) + coords_complex = np.unique(coords_complex) + coords_2d = np.array([coords_complex.real, coords_complex.imag], dtype=int).T + + # calculate properties length and center from coords + new_center = np.mean(coords_2d, axis=0) + new_len = len(coords_2d) + + # append values to output lists + output_center.append(new_center) + output_length.append(new_len) + output_coords.append(coords_2d) + + print( + len(to_merge) - len(output_center), + "shapes that were intersecting were found and merged.", + ) + return output_center, output_length, output_coords + + def check_cell_set_sanity(self, cell_set): + """Check if cell_set dictionary contains the right keys""" + + if "classes" in cell_set: + if not isinstance(cell_set["classes"], (list, str, np.ndarray)): + self.log("No list of classes specified for cell set") + raise TypeError("No list of classes specified for cell set") + else: + self.log("No classes specified for cell set") + raise KeyError("No classes specified for cell set") + + if "well" in cell_set: + if not isinstance(cell_set["well"], str): + self.log("No well of type str specified for cell set") + raise TypeError("No well of type str specified for cell set") + + def load_classes(self, cell_set): + """Identify cell class definition and load classes + + Identify if cell classes are provided as list of integers or as path pointing to a csv file. + Depending on the type of the cell set, the classes are loaded and returned for selection. + """ + if isinstance(cell_set["classes"], list): + return cell_set["classes"] + + if isinstance(cell_set["classes"], np.ndarray): + if np.issubdtype(cell_set["classes"].dtype.type, np.integer): + return cell_set["classes"] + + if isinstance(cell_set["classes"], str): + # If the path is relative, it is interpreted relative to the project directory + if os.path.isabs(cell_set["classes"]): + path = cell_set["classes"] + else: + path = os.path.join(Path(self.directory).parents[0], cell_set["classes"]) + + if os.path.isfile(path): + try: + cr = csv.reader(open(path)) + filtered_classes = np.array([int(el[0]) for el in list(cr)], dtype="int64") + self.log(f"Loaded {len(filtered_classes)} classes from csv") + return filtered_classes + except (ValueError, TypeError) as e: + self.log(f"CSV file could not be converted to list of integers: {path}") + raise ValueError() from e + else: + self.log("Path containing classes could not be read: {path}") + raise ValueError() + else: + self.log( + "classes argument for a cell set needs to be a list of integer ids or a path pointing to a csv of integer ids." + ) + raise TypeError( + "classes argument for a cell set needs to be a list of integer ids or a path pointing to a csv of integer ids." + ) + + def log(self, msg): + if self.verbose: + print(msg) + + def register_parameter(self, key, value): + if isinstance(key, str): + config_handle = self.config + + elif isinstance(key, list): + raise NotImplementedError("registration of parameters is not yet supported for nested parameters") + + else: + raise TypeError("Key musst be of string or a list of strings") + + if key not in config_handle: + self.log(f"No configuration for {key} found, parameter will be set to {value}") + config_handle[key] = value diff --git a/src/lmd/lib/_utils.py b/src/lmd/lib/_utils.py new file mode 100644 index 0000000..ac3d98f --- /dev/null +++ b/src/lmd/lib/_utils.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +from concurrent.futures import ProcessPoolExecutor, as_completed +from typing import Callable + +import matplotlib.pyplot as plt +import numpy as np + +# import warnings +from rdp import rdp +from scipy import ndimage +from scipy.signal import convolve2d +from skimage.morphology import binary_erosion, disk +from skimage.morphology import dilation as binary_dilation +from skimage.segmentation import find_boundaries +from tqdm.auto import tqdm + + +# TODO: Rename tqdm_args to tqdm_kwargs +def _execute_indexed_parallel(func: Callable, *, args: list, tqdm_args: dict = None, n_threads: int = 10) -> list: + """parallelization of function call with indexed arguments using ThreadPoolExecutor. Returns a list of results in the order of the input arguments. + + Args: + func (Callable): _description_ + args (list): _description_ + tqdm_args (dict, optional): _description_. Defaults to None. + n_threads (int, optional): _description_. Defaults to 10. + + Returns: + list: containing the results of the function calls in the same order as the input arguments + """ + if tqdm_args is None: + tqdm_args = {"total": len(args)} + elif "total" not in tqdm_args: + tqdm_args["total"] = len(args) + + results = [None for _ in range(len(args))] + with ProcessPoolExecutor(n_threads) as executor: + with tqdm(**tqdm_args) as pbar: + futures = {executor.submit(func, *arg): i for i, arg in enumerate(args)} + for future in as_completed(futures): + index = futures[future] + results[index] = future.result() + pbar.update(1) + + return results + + +def transform_to_map(coords, dilation=0, erosion=0, coord_format=True, debug=False): + # safety boundary which extands the generated map size + safety_offset = 3 + dilation_offset = int(dilation) + + coords = np.array(coords).astype(int) + + # top left offset used for creating the offset map + if coords.size == 0: + raise ValueError("coords array is empty; cannot compute minimum.") + offset = np.min(coords, axis=0) - safety_offset - dilation_offset + mat = np.array([offset, [0, 0]]) + offset = np.max(mat, axis=0) + + offset_coords = coords - offset + offset_coords = offset_coords.astype(np.uint) + + offset_map_size = np.max(offset_coords, axis=0) + 2 * safety_offset + dilation_offset + offset_map_size = offset_map_size.astype(np.uint) + + offset_map = np.zeros(offset_map_size, dtype=np.ubyte) + + y = tuple(offset_coords.T[0]) + x = tuple(offset_coords.T[1]) + + offset_map[(y, x)] = 1 + + if debug: + plt.imshow(offset_map) + plt.show() + + offset_map = binary_dilation(offset_map, footprint=disk(dilation)) + offset_map = binary_erosion(offset_map, footprint=disk(erosion)) + offset_map = ndimage.binary_fill_holes(offset_map).astype(int) + + if debug: + plt.imshow(offset_map) + plt.show() + + # coord_format will return a sparse format of [[2, 1],[1, 2],[0, 2]] + # otherwise will return a dense matrix and the offset [[0, 0, 1],[0, 0, 1],[0, 1, 0]] + + if coord_format: + idx_local = np.argwhere(offset_map == 1) + idx_global = idx_local + offset + return idx_global + else: + return (offset_map, offset) + + +def _create_poly( + in_tuple, + smoothing_filter_size: int = 12, + rdp_epsilon: float = 0, + debug: bool = False, +): + """Converts a list of pixels into a polygon. + Args + smoothing_filter_size (int, default = 12): The smoothing filter is the circular convolution with a vector of length smoothing_filter_size and all elements 1 / smoothing_filter_size. + + rdp_epsilon (float, default = 0 ): When compression is wanted, this specifies the epsilon value for the Ramer-Douglas-Peucker algorithm. Higher values will result in more compression. + + dilation (int, default = 0): Binary dilation used before polygon creation for increasing the mask size. This Dilation ignores potential neighbours. Neighbour aware dilation of segmentation mask needs to be defined during segmentation. + """ + (offset_map, offset) = in_tuple + + # find polygon bounds from mask + bounds = find_boundaries(offset_map, connectivity=1, mode="subpixel", background=0) + + edges = np.array(np.where(bounds == 1)) / 2 + edges = edges.T + edges = _sort_edges(edges) + + # smoothing resulting shape + smk = np.ones((smoothing_filter_size, 1)) / smoothing_filter_size + edges = convolve2d(edges, smk, mode="full", boundary="wrap") + + # compression of the resulting polygon + poly = rdp(edges, epsilon=rdp_epsilon) # Ramer-Douglas-Peucker algorithm for polygon simplification + + # debuging + """ + print(self.poly.shape) + fig = plt.figure(frameon=False) + fig.set_size_inches(10,10) + ax = plt.Axes(fig, [0., 0., 1., 1.]) + ax.set_axis_off() + fig.add_axes(ax) + ax.imshow(bounds) + ax.plot(edges[:,1]*2,edges[:,0]*2) + ax.plot(self.poly[:,1]*2,self.poly[:,0]*2) + """ + + return poly + offset + + +def _sort_edges(edges): + """Sorts the vertices of the polygon. + Greedy sorting is performed, might have difficulties with complex shapes. + + """ + + it = len(edges) + new = [] + new.append(edges[0]) + + edges = np.delete(edges, 0, 0) + + for i in range(1, it): + old = np.array(new[i - 1]) + + dist = np.linalg.norm(edges - old, axis=1) + + min_index = np.argmin(dist) + new.append(edges[min_index]) + edges = np.delete(edges, min_index, 0) + + return np.array(new) From df33ecf809a3a0b6402805b3145a546047b0519e Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Wed, 14 Jan 2026 22:48:07 +0100 Subject: [PATCH 03/44] [API] Expose _utils functions --- src/lmd/lib/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lmd/lib/__init__.py b/src/lmd/lib/__init__.py index d2c4763..55077dd 100644 --- a/src/lmd/lib/__init__.py +++ b/src/lmd/lib/__init__.py @@ -2,3 +2,4 @@ from ._geom import Collection, Shape from ._segmentation import SegmentationLoader +from ._utils import _create_poly, _execute_indexed_parallel, _sort_edges From c084530e32c88acfbe5b73982807a7cc00ef21bd Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Wed, 14 Jan 2026 23:05:11 +0100 Subject: [PATCH 04/44] [Rev] Only export public methods --- src/lmd/lib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lmd/lib/__init__.py b/src/lmd/lib/__init__.py index 55077dd..d559e4b 100644 --- a/src/lmd/lib/__init__.py +++ b/src/lmd/lib/__init__.py @@ -2,4 +2,4 @@ from ._geom import Collection, Shape from ._segmentation import SegmentationLoader -from ._utils import _create_poly, _execute_indexed_parallel, _sort_edges +from ._utils import transform_to_map From c67411536703c146789a493da7e94b64fde68f78 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Wed, 14 Jan 2026 23:07:00 +0100 Subject: [PATCH 05/44] [Rev-Rev] Export all methods, including private methods to maintain API --- src/lmd/lib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lmd/lib/__init__.py b/src/lmd/lib/__init__.py index d559e4b..a257c14 100644 --- a/src/lmd/lib/__init__.py +++ b/src/lmd/lib/__init__.py @@ -2,4 +2,4 @@ from ._geom import Collection, Shape from ._segmentation import SegmentationLoader -from ._utils import transform_to_map +from ._utils import _create_poly, _execute_indexed_parallel, _sort_edges, transform_to_map From 517cbba8cb671c3ab6e42a93af6e094aaabe601f Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Wed, 14 Jan 2026 23:07:52 +0100 Subject: [PATCH 06/44] [Test] Mirror changes in package structure and split tests into modules --- tests/unit/test_lmd/test_lib/test__geom.py | 334 ++++++++++++++++++ .../unit/test_lmd/test_lib/test_lib_utils.py | 92 +++++ 2 files changed, 426 insertions(+) create mode 100644 tests/unit/test_lmd/test_lib/test__geom.py create mode 100644 tests/unit/test_lmd/test_lib/test_lib_utils.py diff --git a/tests/unit/test_lmd/test_lib/test__geom.py b/tests/unit/test_lmd/test_lib/test__geom.py new file mode 100644 index 0000000..14bc977 --- /dev/null +++ b/tests/unit/test_lmd/test_lib/test__geom.py @@ -0,0 +1,334 @@ +import os + +import geopandas as gpd +import numpy as np +import pytest +import shapely +from lxml import etree as ET + +from lmd.lib import Collection, Shape + + +@pytest.fixture +def shape_xml(): + """Shape XML""" + # Define shape in xml + return """ + + 3 + A1 + this is a test + 1 + 3.1415 + 0 + -0 + 0 + -1 + 1 + -0 + + """.strip() + + +@pytest.fixture +def incorrect_shape_xml(): + """Shape XML""" + # Define shape in xml + return """ + + 2 + A1 + this is a test + 1 + 3.1415 + 0 + -0 + 0 + -1 + + """.strip() + + +@pytest.fixture +def collection_xml(tmpdir, shape_xml): + """Shape XML""" + # Define shape in xml + collection = f""" + + + 1 + 0 + 0 + 0 + 10000 + 5000 + 5000 + 1 + {shape_xml} + + """.strip() + + tmpfile = os.path.join(tmpdir, "test.xml") + with open(tmpfile, "w") as f: + f.write(collection) + yield tmpfile + os.remove(tmpfile) + + +@pytest.fixture +def incorrect_collection_xml(tmpdir, incorrect_shape_xml): + """Shape XML""" + # Define shape in xml + collection = f""" + + + 1 + 0 + 0 + 0 + 10000 + 5000 + 5000 + 1 + {incorrect_shape_xml} + + """.strip() + + tmpfile = os.path.join(tmpdir, "test.xml") + + with open(tmpfile, "w") as f: + f.write(collection) + yield tmpfile + os.remove(tmpfile) + + +def test_collection() -> None: + calibration = np.array([[0, 0], [0, 100], [50, 50]]) + Collection(calibration_points=calibration) + + +def test_collection_load(collection_xml) -> None: + """Test collection loading from xml""" + collection = Collection() + collection.load(collection_xml) + + +def test_collection_invalid_shapes_raise(incorrect_collection_xml) -> None: + """Test collection loading from xml with incorrect shapes""" + collection = Collection() + with pytest.raises(ValueError): + collection.load(incorrect_collection_xml, raise_shape_errors=True) + + +def test_collection_invalid_shapes_warn(incorrect_collection_xml) -> None: + """Test collection loading from xml with incorrect shapes""" + collection = Collection() + with pytest.warns(): + collection.load(incorrect_collection_xml, raise_shape_errors=False) + + +def test_shape(): + rectangle_coordinates = np.array([[10, 10], [40, 10], [40, 40], [10, 40], [10, 10]]) + Shape(rectangle_coordinates) + + +@pytest.mark.parametrize( + ("invalid_shape", "error_message"), + [ + # 2 points + ( + np.array([[0, 0], [0, 1]]), + "Valid shape must contain at least 3 points, but only contains 2", + ), + # Additional dimension + ( + np.zeros(shape=(2, 2, 2)), + "Shape dimensionality is not valid", + ), + # 3d points + ( + np.zeros(shape=(5, 3)), + "Shape dimensionality is not valid", + ), + # 3d points and too few points - covered by dimensionality validation + ( + np.zeros(shape=(2, 3)), + "Shape dimensionality is not valid", + ), + ], +) +def test_shape_invalid_shapes(invalid_shape, error_message): + with pytest.raises(ValueError, match=error_message): + Shape(points=invalid_shape) + + +def test_shape_from_xml(shape_xml): + """Read a minimal xml representation of a cell shape and associated metadata""" + # Load xml with Shape + shape_xml = ET.fromstring(bytes(shape_xml, encoding="utf-8")) + shape = Shape.from_xml(shape_xml) + assert (shape.points == np.array([[0, 0], [0, -1], [1, 0]])).all() + assert shape.well == "A1" + assert shape.custom_attributes["TEST"] == "this is a test" + assert shape.custom_attributes["test2"] == "1" + assert shape.custom_attributes["test3"] == "3.1415" + + +def test_plotting(): + calibration = np.array([[0, 0], [0, 100], [50, 50]]) + my_first_collection = Collection(calibration_points=calibration) + + # create a custom rectangle + + rectangle_coordinates = np.array([[10, 10], [40, 10], [40, 40], [10, 40], [10, 10]]) + rectangle = Shape(rectangle_coordinates) + + my_first_collection.add_shape(rectangle) + + my_first_collection.plot(calibration=True) + + triangle_coordinates = np.array([[10, 70], [40, 70], [40, 100], [10, 70]]) + my_first_collection.new_shape(triangle_coordinates) + + my_first_collection.plot(calibration=True) + + +@pytest.fixture +def geopandas_collection(): + """Geopandas shape collection with both controlled (name, well) and custom metadata""" + return gpd.GeoDataFrame( + data={"well": ["A1"], "name": "my_shape", "string_attribute": "a"}, + geometry=[shapely.Polygon([[0, 0], [0, 1], [1, 0], [0, 0]])], + ) + + +@pytest.mark.parametrize( + ("well_column", "name_column", "custom_attributes"), + [ + ("well", None, None), + (None, "well", None), + (None, None, "string_attribute"), + ("well", "name", None), + ("well", "name", "string_attribute"), + ], +) +def test_collection_load_geopandas( + geopandas_collection: gpd.GeoDataFrame, + well_column: str, + name_column: str, + custom_attributes: list[str], +) -> None: + # Export well metadata + c = Collection(calibration_points=np.array([[-1, -1], [1, 1], [0, 1]])) + calibration_points_old = c.calibration_points + c.load_geopandas( + geopandas_collection, + well_column=well_column, + name_column=name_column, + custom_attribute_columns=custom_attributes, + ) + + all_columns = [col for col in (well_column, custom_attributes) if col is not None] + + assert c.to_geopandas(*all_columns).equals(geopandas_collection[[*all_columns, "geometry"]]) # type: ignore # mixed-type unpacking; safe by inspection + assert (c.calibration_points == calibration_points_old).all() + + # Overwrite calibration points + c = Collection(calibration_points=np.array([[-1, -1], [1, 1], [0, 1]])) + calibration_points_new = np.array([[0, 0], [100, 0], [0, 100]]) + + c.load_geopandas( + geopandas_collection, + calibration_points=calibration_points_new, + well_column=well_column, + name_column=name_column, + custom_attribute_columns=custom_attributes, + ) + assert c.to_geopandas(*all_columns).equals(geopandas_collection[[*all_columns, "geometry"]]) # type: ignore # mixed-type unpacking; safe by inspection + assert (c.calibration_points == calibration_points_new).all() + + # Do not export well metadata + c = Collection(calibration_points=np.array([[-1, -1], [1, 1], [0, 1]])) + c.load_geopandas(geopandas_collection) + assert c.to_geopandas().equals(geopandas_collection[["geometry"]]) + + +def test_collection_save(): + calibration = np.array([[0, 0], [0, 100], [50, 50]]) + my_first_collection = Collection(calibration_points=calibration) + + # create a custom rectangle + + rectangle_coordinates = np.array([[10, 10], [40, 10], [40, 40], [10, 40], [10, 10]]) + rectangle = Shape(rectangle_coordinates) + + my_first_collection.add_shape(rectangle) + + my_first_collection.save("first_collection.xml") + + +@pytest.fixture +def collection1() -> Collection: + """Collection with identity orientation transform and one shape""" + calibration = np.array([[0, 0], [0, 100], [50, 50]]) + collection = Collection(calibration_points=calibration, orientation_transform=np.eye(2)) + collection.add_shape(Shape(np.array([[0, 0], [1, 0], [1, 1], [0, 0]]))) + return collection + + +@pytest.fixture +def collection2_same_orientation_transform() -> Collection: + """Collection with same orientation transform as collection1""" + calibration = np.array([[0, 0], [0, 100], [50, 50]]) + collection = Collection(calibration_points=calibration, orientation_transform=np.eye(2)) + collection.add_shape(Shape(np.array([[2, 2], [3, 2], [3, 3], [2, 2]]))) + return collection + + +@pytest.fixture +def collection2_different_orientation_transform() -> Collection: + """Collection with different orientation transform (90 degree rotation)""" + calibration = np.array([[0, 0], [0, 100], [50, 50]]) + orientation_transform = np.array([[0, -1], [1, 0]]) + collection = Collection(calibration_points=calibration, orientation_transform=orientation_transform) + collection.add_shape(Shape(np.array([[2, 2], [3, 2], [3, 3], [2, 2]]))) + return collection + + +def test_collection_join_same_orientation_transform( + collection1: Collection, collection2_same_orientation_transform: Collection +) -> None: + """Test joining collections with the same orientation_transform""" + original_shape = collection1.shapes[0].points.copy() + joined_shape = collection2_same_orientation_transform.shapes[0].points.copy() + + collection1.join(collection2_same_orientation_transform) + + assert len(collection1.shapes) == 2 + assert np.array_equal(collection1.shapes[0].points, original_shape) + assert np.array_equal(collection1.shapes[1].points, joined_shape) + + +def test_collection_join_different_orientation_transform_update( + collection1: Collection, collection2_different_orientation_transform: Collection +) -> None: + """Test joining collections with different orientation_transforms and update_orientation_transform=True""" + original_shape = collection1.shapes[0].points.copy() + joined_shape = collection2_different_orientation_transform.shapes[0].points.copy() + + collection1.join(collection2_different_orientation_transform, update_orientation_transform=True) + + assert len(collection1.shapes) == 2 + assert np.array_equal(collection1.shapes[0].points, original_shape) + assert np.array_equal(collection1.shapes[1].points, joined_shape) + assert np.array_equal(collection1.shapes[1].orientation_transform, collection1.orientation_transform) + + +def test_collection_join_different_orientation_transform_no_update_warns( + collection1: Collection, collection2_different_orientation_transform: Collection +) -> None: + """Test joining collections with different orientation_transforms and update_orientation_transform=False warns""" + with pytest.warns(UserWarning): + collection1.join(collection2_different_orientation_transform, update_orientation_transform=False) + + assert len(collection1.shapes) == 2 diff --git a/tests/unit/test_lmd/test_lib/test_lib_utils.py b/tests/unit/test_lmd/test_lib/test_lib_utils.py new file mode 100644 index 0000000..2c7109c --- /dev/null +++ b/tests/unit/test_lmd/test_lib/test_lib_utils.py @@ -0,0 +1,92 @@ +import operator +from typing import Optional + +import numpy as np +import pytest + +from lmd.lib import _create_poly, _execute_indexed_parallel, _sort_edges + + +@pytest.mark.parametrize("tqdm_args", [None, {}]) +def test__execute_indexed_parallel(tqdm_args: Optional[dict]) -> None: + """Test parallelized execution""" + values = range(10) + result = _execute_indexed_parallel(operator.mul, args=[[i, 2] for i in values], tqdm_args=tqdm_args, n_threads=2) + + assert result == [2 * value for value in values] + + +@pytest.fixture +def square_mask() -> np.ndarray: + """Create a simple 10x10 square mask""" + mask = np.zeros((20, 20), dtype=np.uint8) + mask[5:15, 5:15] = 1 + return mask + + +def test__create_poly_returns_polygon(square_mask: np.ndarray) -> None: + """Test that _create_poly returns a polygon from a binary mask""" + offset = np.array([0, 0]) + in_tuple = (square_mask, offset) + + result = _create_poly(in_tuple, smoothing_filter_size=3, rdp_epsilon=0) + + assert isinstance(result, np.ndarray) + assert result.ndim == 2 + assert result.shape[1] == 2 # Each point has x, y coordinates + assert len(result) >= 4 # At least 4 points for a quadrilateral + + +def test__create_poly_applies_offset(square_mask: np.ndarray) -> None: + """Test that _create_poly correctly applies the offset""" + offset = np.array([100, 200]) + in_tuple = (square_mask, offset) + + result = _create_poly(in_tuple, smoothing_filter_size=3, rdp_epsilon=0) + + assert np.all(result[:, 0] >= offset[0]) + assert np.all(result[:, 1] >= offset[1]) + + +def test__create_poly_rdp_epsilon_reduces_points(square_mask: np.ndarray) -> None: + """Test that higher rdp_epsilon results in fewer polygon points""" + offset = np.array([0, 0]) + in_tuple = (square_mask, offset) + + result_no_compression = _create_poly(in_tuple, smoothing_filter_size=3, rdp_epsilon=0) + result_with_compression = _create_poly(in_tuple, smoothing_filter_size=3, rdp_epsilon=1.0) + + assert len(result_with_compression) <= len(result_no_compression) + + +@pytest.mark.parametrize( + ("edges", "expected_result"), + [ + # Already sorted - nearest neighbor from each point is the next one + ( + np.array([[0, 0], [1, 0], [2, 0]]), + np.array([[0, 0], [1, 0], [2, 0]]), + ), + # Reversed order - greedy sort starts from first element + ( + np.array([[0, 0], [2, 0], [1, 0]]), + np.array([[0, 0], [1, 0], [2, 0]]), + ), + # Square vertices in scrambled order + ( + np.array([[0, 0], [1, 1], [1, 0], [0, 1]]), + np.array([[0, 0], [1, 0], [1, 1], [0, 1]]), + ), + # Two points - minimal case + ( + np.array([[0, 0], [1, 1]]), + np.array([[0, 0], [1, 1]]), + ), + ], + ids=("already_sorted", "reversed_middle", "square_scrambled", "minimal"), +) +def test__sort_edges(edges: np.ndarray, expected_result: np.ndarray) -> None: + """Test `_sort_edges` for greedy nearest-neighbor sorting of polygon vertices""" + result = _sort_edges(edges) + + assert np.array_equal(result, expected_result) From 08c264413ee92f9548994e482d0960eb71411359 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Wed, 14 Jan 2026 23:10:49 +0100 Subject: [PATCH 07/44] [Test] Delete old monolithic test file --- tests/unit/test_lmd/test_lib.py | 421 -------------------------------- 1 file changed, 421 deletions(-) delete mode 100644 tests/unit/test_lmd/test_lib.py diff --git a/tests/unit/test_lmd/test_lib.py b/tests/unit/test_lmd/test_lib.py deleted file mode 100644 index 6cffe96..0000000 --- a/tests/unit/test_lmd/test_lib.py +++ /dev/null @@ -1,421 +0,0 @@ -import operator -import os -from typing import Optional - -import geopandas as gpd -import numpy as np -import pytest -import shapely -from lxml import etree as ET - -from lmd.lib import Collection, Shape, _create_poly, _execute_indexed_parallel, _sort_edges - - -@pytest.fixture -def shape_xml(): - """Shape XML""" - # Define shape in xml - return """ - - 3 - A1 - this is a test - 1 - 3.1415 - 0 - -0 - 0 - -1 - 1 - -0 - - """.strip() - - -@pytest.fixture -def incorrect_shape_xml(): - """Shape XML""" - # Define shape in xml - return """ - - 2 - A1 - this is a test - 1 - 3.1415 - 0 - -0 - 0 - -1 - - """.strip() - - -@pytest.fixture -def collection_xml(tmpdir, shape_xml): - """Shape XML""" - # Define shape in xml - collection = f""" - - - 1 - 0 - 0 - 0 - 10000 - 5000 - 5000 - 1 - {shape_xml} - - """.strip() - - tmpfile = os.path.join(tmpdir, "test.xml") - with open(tmpfile, "w") as f: - f.write(collection) - yield tmpfile - os.remove(tmpfile) - - -@pytest.fixture -def incorrect_collection_xml(tmpdir, incorrect_shape_xml): - """Shape XML""" - # Define shape in xml - collection = f""" - - - 1 - 0 - 0 - 0 - 10000 - 5000 - 5000 - 1 - {incorrect_shape_xml} - - """.strip() - - tmpfile = os.path.join(tmpdir, "test.xml") - - with open(tmpfile, "w") as f: - f.write(collection) - yield tmpfile - os.remove(tmpfile) - - -@pytest.mark.parametrize("tqdm_args", [None, {}]) -def test__execute_indexed_parallel(tqdm_args: Optional[dict]) -> None: - """Test parallelized execution""" - values = range(10) - result = _execute_indexed_parallel(operator.mul, args=[[i, 2] for i in values], tqdm_args=tqdm_args, n_threads=2) - - assert result == [2 * value for value in values] - - -@pytest.mark.parametrize( - ("edges", "expected_result"), - [ - # Already sorted - nearest neighbor from each point is the next one - ( - np.array([[0, 0], [1, 0], [2, 0]]), - np.array([[0, 0], [1, 0], [2, 0]]), - ), - # Reversed order - greedy sort starts from first element - ( - np.array([[0, 0], [2, 0], [1, 0]]), - np.array([[0, 0], [1, 0], [2, 0]]), - ), - # Square vertices in scrambled order - ( - np.array([[0, 0], [1, 1], [1, 0], [0, 1]]), - np.array([[0, 0], [1, 0], [1, 1], [0, 1]]), - ), - # Two points - minimal case - ( - np.array([[0, 0], [1, 1]]), - np.array([[0, 0], [1, 1]]), - ), - ], - ids=("already_sorted", "reversed_middle", "square_scrambled", "minimal"), -) -def test__sort_edges(edges: np.ndarray, expected_result: np.ndarray) -> None: - """Test `_sort_edges` for greedy nearest-neighbor sorting of polygon vertices""" - result = _sort_edges(edges) - - assert np.array_equal(result, expected_result) - - -def test_collection() -> None: - calibration = np.array([[0, 0], [0, 100], [50, 50]]) - Collection(calibration_points=calibration) - - -def test_collection_load(collection_xml) -> None: - """Test collection loading from xml""" - collection = Collection() - collection.load(collection_xml) - - -def test_collection_invalid_shapes_raise(incorrect_collection_xml) -> None: - """Test collection loading from xml with incorrect shapes""" - collection = Collection() - with pytest.raises(ValueError): - collection.load(incorrect_collection_xml, raise_shape_errors=True) - - -def test_collection_invalid_shapes_warn(incorrect_collection_xml) -> None: - """Test collection loading from xml with incorrect shapes""" - collection = Collection() - with pytest.warns(): - collection.load(incorrect_collection_xml, raise_shape_errors=False) - - -def test_shape(): - rectangle_coordinates = np.array([[10, 10], [40, 10], [40, 40], [10, 40], [10, 10]]) - Shape(rectangle_coordinates) - - -@pytest.mark.parametrize( - ("invalid_shape", "error_message"), - [ - # 2 points - ( - np.array([[0, 0], [0, 1]]), - "Valid shape must contain at least 3 points, but only contains 2", - ), - # Additional dimension - ( - np.zeros(shape=(2, 2, 2)), - "Shape dimensionality is not valid", - ), - # 3d points - ( - np.zeros(shape=(5, 3)), - "Shape dimensionality is not valid", - ), - # 3d points and too few points - covered by dimensionality validation - ( - np.zeros(shape=(2, 3)), - "Shape dimensionality is not valid", - ), - ], -) -def test_shape_invalid_shapes(invalid_shape, error_message): - with pytest.raises(ValueError, match=error_message): - Shape(points=invalid_shape) - - -def test_shape_from_xml(shape_xml): - """Read a minimal xml representation of a cell shape and associated metadata""" - # Load xml with Shape - shape_xml = ET.fromstring(bytes(shape_xml, encoding="utf-8")) - shape = Shape.from_xml(shape_xml) - assert (shape.points == np.array([[0, 0], [0, -1], [1, 0]])).all() - assert shape.well == "A1" - assert shape.custom_attributes["TEST"] == "this is a test" - assert shape.custom_attributes["test2"] == "1" - assert shape.custom_attributes["test3"] == "3.1415" - - -def test_plotting(): - calibration = np.array([[0, 0], [0, 100], [50, 50]]) - my_first_collection = Collection(calibration_points=calibration) - - # create a custom rectangle - - rectangle_coordinates = np.array([[10, 10], [40, 10], [40, 40], [10, 40], [10, 10]]) - rectangle = Shape(rectangle_coordinates) - - my_first_collection.add_shape(rectangle) - - my_first_collection.plot(calibration=True) - - triangle_coordinates = np.array([[10, 70], [40, 70], [40, 100], [10, 70]]) - my_first_collection.new_shape(triangle_coordinates) - - my_first_collection.plot(calibration=True) - - -@pytest.fixture -def geopandas_collection(): - """Geopandas shape collection with both controlled (name, well) and custom metadata""" - return gpd.GeoDataFrame( - data={"well": ["A1"], "name": "my_shape", "string_attribute": "a"}, - geometry=[shapely.Polygon([[0, 0], [0, 1], [1, 0], [0, 0]])], - ) - - -@pytest.mark.parametrize( - ("well_column", "name_column", "custom_attributes"), - [ - ("well", None, None), - (None, "well", None), - (None, None, "string_attribute"), - ("well", "name", None), - ("well", "name", "string_attribute"), - ], -) -def test_collection_load_geopandas( - geopandas_collection: gpd.GeoDataFrame, - well_column: str, - name_column: str, - custom_attributes: list[str], -) -> None: - # Export well metadata - c = Collection(calibration_points=np.array([[-1, -1], [1, 1], [0, 1]])) - calibration_points_old = c.calibration_points - c.load_geopandas( - geopandas_collection, - well_column=well_column, - name_column=name_column, - custom_attribute_columns=custom_attributes, - ) - - all_columns = [col for col in (well_column, custom_attributes) if col is not None] - - assert c.to_geopandas(*all_columns).equals(geopandas_collection[[*all_columns, "geometry"]]) # type: ignore # mixed-type unpacking; safe by inspection - assert (c.calibration_points == calibration_points_old).all() - - # Overwrite calibration points - c = Collection(calibration_points=np.array([[-1, -1], [1, 1], [0, 1]])) - calibration_points_new = np.array([[0, 0], [100, 0], [0, 100]]) - - c.load_geopandas( - geopandas_collection, - calibration_points=calibration_points_new, - well_column=well_column, - name_column=name_column, - custom_attribute_columns=custom_attributes, - ) - assert c.to_geopandas(*all_columns).equals(geopandas_collection[[*all_columns, "geometry"]]) # type: ignore # mixed-type unpacking; safe by inspection - assert (c.calibration_points == calibration_points_new).all() - - # Do not export well metadata - c = Collection(calibration_points=np.array([[-1, -1], [1, 1], [0, 1]])) - c.load_geopandas(geopandas_collection) - assert c.to_geopandas().equals(geopandas_collection[["geometry"]]) - - -def test_collection_save(): - calibration = np.array([[0, 0], [0, 100], [50, 50]]) - my_first_collection = Collection(calibration_points=calibration) - - # create a custom rectangle - - rectangle_coordinates = np.array([[10, 10], [40, 10], [40, 40], [10, 40], [10, 10]]) - rectangle = Shape(rectangle_coordinates) - - my_first_collection.add_shape(rectangle) - - my_first_collection.save("first_collection.xml") - - -@pytest.fixture -def collection1() -> Collection: - """Collection with identity orientation transform and one shape""" - calibration = np.array([[0, 0], [0, 100], [50, 50]]) - collection = Collection(calibration_points=calibration, orientation_transform=np.eye(2)) - collection.add_shape(Shape(np.array([[0, 0], [1, 0], [1, 1], [0, 0]]))) - return collection - - -@pytest.fixture -def collection2_same_orientation_transform() -> Collection: - """Collection with same orientation transform as collection1""" - calibration = np.array([[0, 0], [0, 100], [50, 50]]) - collection = Collection(calibration_points=calibration, orientation_transform=np.eye(2)) - collection.add_shape(Shape(np.array([[2, 2], [3, 2], [3, 3], [2, 2]]))) - return collection - - -@pytest.fixture -def collection2_different_orientation_transform() -> Collection: - """Collection with different orientation transform (90 degree rotation)""" - calibration = np.array([[0, 0], [0, 100], [50, 50]]) - orientation_transform = np.array([[0, -1], [1, 0]]) - collection = Collection(calibration_points=calibration, orientation_transform=orientation_transform) - collection.add_shape(Shape(np.array([[2, 2], [3, 2], [3, 3], [2, 2]]))) - return collection - - -def test_collection_join_same_orientation_transform( - collection1: Collection, collection2_same_orientation_transform: Collection -) -> None: - """Test joining collections with the same orientation_transform""" - original_shape = collection1.shapes[0].points.copy() - joined_shape = collection2_same_orientation_transform.shapes[0].points.copy() - - collection1.join(collection2_same_orientation_transform) - - assert len(collection1.shapes) == 2 - assert np.array_equal(collection1.shapes[0].points, original_shape) - assert np.array_equal(collection1.shapes[1].points, joined_shape) - - -def test_collection_join_different_orientation_transform_update( - collection1: Collection, collection2_different_orientation_transform: Collection -) -> None: - """Test joining collections with different orientation_transforms and update_orientation_transform=True""" - original_shape = collection1.shapes[0].points.copy() - joined_shape = collection2_different_orientation_transform.shapes[0].points.copy() - - collection1.join(collection2_different_orientation_transform, update_orientation_transform=True) - - assert len(collection1.shapes) == 2 - assert np.array_equal(collection1.shapes[0].points, original_shape) - assert np.array_equal(collection1.shapes[1].points, joined_shape) - assert np.array_equal(collection1.shapes[1].orientation_transform, collection1.orientation_transform) - - -def test_collection_join_different_orientation_transform_no_update_warns( - collection1: Collection, collection2_different_orientation_transform: Collection -) -> None: - """Test joining collections with different orientation_transforms and update_orientation_transform=False warns""" - with pytest.warns(UserWarning): - collection1.join(collection2_different_orientation_transform, update_orientation_transform=False) - - assert len(collection1.shapes) == 2 - - -@pytest.fixture -def square_mask() -> np.ndarray: - """Create a simple 10x10 square mask""" - mask = np.zeros((20, 20), dtype=np.uint8) - mask[5:15, 5:15] = 1 - return mask - - -def test__create_poly_returns_polygon(square_mask: np.ndarray) -> None: - """Test that _create_poly returns a polygon from a binary mask""" - offset = np.array([0, 0]) - in_tuple = (square_mask, offset) - - result = _create_poly(in_tuple, smoothing_filter_size=3, rdp_epsilon=0) - - assert isinstance(result, np.ndarray) - assert result.ndim == 2 - assert result.shape[1] == 2 # Each point has x, y coordinates - assert len(result) >= 4 # At least 4 points for a quadrilateral - - -def test__create_poly_applies_offset(square_mask: np.ndarray) -> None: - """Test that _create_poly correctly applies the offset""" - offset = np.array([100, 200]) - in_tuple = (square_mask, offset) - - result = _create_poly(in_tuple, smoothing_filter_size=3, rdp_epsilon=0) - - assert np.all(result[:, 0] >= offset[0]) - assert np.all(result[:, 1] >= offset[1]) - - -def test__create_poly_rdp_epsilon_reduces_points(square_mask: np.ndarray) -> None: - """Test that higher rdp_epsilon results in fewer polygon points""" - offset = np.array([0, 0]) - in_tuple = (square_mask, offset) - - result_no_compression = _create_poly(in_tuple, smoothing_filter_size=3, rdp_epsilon=0) - result_with_compression = _create_poly(in_tuple, smoothing_filter_size=3, rdp_epsilon=1.0) - - assert len(result_with_compression) <= len(result_no_compression) From be0322534b30fd8d008a6111d15ad1c0fdf2d299 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Wed, 14 Jan 2026 23:48:43 +0100 Subject: [PATCH 08/44] [Chore] Add todo statements --- src/lmd/lib/_utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lmd/lib/_utils.py b/src/lmd/lib/_utils.py index ac3d98f..788d599 100644 --- a/src/lmd/lib/_utils.py +++ b/src/lmd/lib/_utils.py @@ -46,6 +46,9 @@ def _execute_indexed_parallel(func: Callable, *, args: list, tqdm_args: dict = N return results +# TODO: Remove debug argument [Breaking] +# TODO: Add type hints +# TODO: Add docstring to public method def transform_to_map(coords, dilation=0, erosion=0, coord_format=True, debug=False): # safety boundary which extands the generated map size safety_offset = 3 @@ -96,6 +99,7 @@ def transform_to_map(coords, dilation=0, erosion=0, coord_format=True, debug=Fal return (offset_map, offset) +# TODO: Remove debugging logic def _create_poly( in_tuple, smoothing_filter_size: int = 12, @@ -144,10 +148,9 @@ def _create_poly( def _sort_edges(edges): """Sorts the vertices of the polygon. - Greedy sorting is performed, might have difficulties with complex shapes. + Greedy sorting is performed, might have difficulties with complex shapes. """ - it = len(edges) new = [] new.append(edges[0]) From 0c70dc6c680c18e36675d45b638d4afebb55dd4c Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:30:06 +0100 Subject: [PATCH 09/44] [API] Update imports in segmenation module --- src/lmd/lib/_segmentation.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/lmd/lib/_segmentation.py b/src/lmd/lib/_segmentation.py index fb29d30..c6816ff 100644 --- a/src/lmd/lib/_segmentation.py +++ b/src/lmd/lib/_segmentation.py @@ -18,13 +18,8 @@ from scipy.spatial import cKDTree from tqdm.auto import tqdm -from lmd.segmentation import ( - _create_coord_index_sparse, - calc_len, - get_coordinate_form, - tsp_greedy_solve, - tsp_hilbert_solve, -) +from lmd.path import calc_len, tsp_greedy_solve, tsp_hilbert_solve +from lmd.segmentation import _create_coord_index_sparse, get_coordinate_form from ._geom import Collection from ._utils import _create_poly, _execute_indexed_parallel, transform_to_map From 1ee361e18dd1039993c09dca624d746ede62cb8c Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:30:43 +0100 Subject: [PATCH 10/44] [Refactor] Move path-optimization-related functions to dedicated module --- src/lmd/path.py | 185 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 src/lmd/path.py diff --git a/src/lmd/path.py b/src/lmd/path.py new file mode 100644 index 0000000..bbd8d0b --- /dev/null +++ b/src/lmd/path.py @@ -0,0 +1,185 @@ +"""Path optimization utilities for LMD cutting path generation. + +This module provides algorithms for solving the Traveling Salesman Problem (TSP) +to optimize the order of cutting shapes, minimizing total travel distance for +laser microdissection operations. + +Functions: + tsp_hilbert_solve: Hilbert curve-based TSP approximation + tsp_greedy_solve: Greedy k-NN based TSP approximation + calc_len: Calculate total path length from coordinates +""" + +import numpy as np +from hilbertcurve.hilbertcurve import HilbertCurve +from numba import njit + + +def calc_len(data): + """calculate the length of a path based on a list of coordinates + + Args: + data (np.array): Array of shape `(N, 2)` containing a list of coordinates + + """ + + index = np.arange(len(data)).astype(int) + + not_shifted = data[index[:-1]] + shifted = data[index[1:]] + + diff = not_shifted - shifted + sq = np.square(diff) + dist = np.sum(np.sqrt(np.sum(sq, axis=1))) + + return dist + + +@njit() +def assign_vertices(hilbert_points, data_rounded): + data_rounded = data_rounded.astype(np.int64) + hilbert_points = hilbert_points.astype(np.int64) + + output_order = np.zeros(len(data_rounded)).astype(np.int64) + current_index = 0 + + for hilbert_point in hilbert_points: + for i, data_point in enumerate(data_rounded): + if np.array_equal(hilbert_point, data_point): + output_order[current_index] = i + current_index += 1 + + return output_order + + +def tsp_hilbert_solve(data, p=3): + p = p + n = 2 + max_n = 2 ** (p * n) + hilbert_curve = HilbertCurve(p, n) + distances = list(range(max_n)) + hilbert_points = hilbert_curve.points_from_distances(distances) + hilbert_points = np.array(hilbert_points) + + data_min = np.min(data, axis=0) + data_max = np.max(data, axis=0) + + hilbert_min = np.min(hilbert_points, axis=0) + hilbert_max = np.max(hilbert_points, axis=0) + + data_scaled = data - data_min + data_scaled = data_scaled / (data_max - data_min) * (hilbert_max - hilbert_min) + + data_rounded = np.round(data_scaled).astype(int) + + order = assign_vertices(hilbert_points, data_rounded) + + return order + + +# TODO: Remove unused argument `world_size` +# TODO: Add type hints +# TODO: Add docstrings +# return the first element not present in a list +def _get_closest(used, choices, world_size): + for element in choices: + if element not in used: + # knn matrix contains -1 if the number of elements is smaller than k + if element == -1: + return None + else: + return element + + return None + # all choices have been taken, return closest free index due to local optimality + + +# TODO: Rename to TSP (traveling sales person) +# TODO: Add type hints +# TODO: Add umap as optional dependency +def _tps_greedy_solve(data, k=100): + samples = len(data) + + print(f"{samples} nodes left") + # recursive abort + if samples == 1: + return data + + import umap + + knn_index, knn_dist, _ = umap.umap_.nearest_neighbors( + data, n_neighbors=k, metric="euclidean", metric_kwds={}, angular=True, random_state=np.random.RandomState(42) + ) + + knn_index = knn_index[:, 1:] + knn_dist = knn_dist[:, 1:] + + # follow greedy knn as long as a nearest neighbour is found in the current tree + nodes = [] + current_node = 0 + while current_node is not None: + nodes.append(current_node) + # print(current_node, knn_index[current_node], next_node) + next_node = _get_closest(nodes, knn_index[current_node], samples) + + current_node = next_node + + # as soon as no nearest neigbour can be found, create a new list of all elements still remeining + # nodes: [0, 2, 5], nodes_left: [1, 3, 4, 6, 7, 8, 9] + # add the last node assigned as starting point to the new list + # nodes: [0, 2], nodes_left: [5, 1, 3, 4, 6, 7, 8, 9] + + nodes_left = list(set(range(samples)) - set(nodes)) + + # add last node from nodes to nodes_left + + nodes_left = [nodes.pop(-1)] + nodes_left + + node_data_left = data[nodes_left] + + # join lists + + return np.concatenate([data[nodes], _tps_greedy_solve(node_data_left, k=k)]) + + +# TODO: Add type hints +# TODO: Add docstrings +# calculate the index array for a sorted 2d list based on an unsorted list +@njit() +def _get_nodes(data, sorted_data): + indexed_data = list(enumerate(data)) + + nodes = [] + + print("start sorting") + for element in sorted_data: + for j, tup in enumerate(indexed_data): + i, el = tup + + if np.array_equal(el, element): + nodes.append(i) + indexed_data.pop(j) + return nodes + + +# TODO: Add type hints +def tsp_greedy_solve(node_list, k=100, return_sorted=False): + """Find an approximation of the closest path through a list of coordinates + + Args: + node_list (np.array): Array of shape `(N, 2)` containing a list of coordinates + + k (int, default: 100): Number of Nearest Neighbours calculated for each Node. + + return_sorted: If set to False a list of indices is returned. If set to True the sorted coordinates are returned. + + """ + + sorted_nodes = _tps_greedy_solve(node_list) + + if return_sorted: + return sorted_nodes + + else: + nodes_order = _get_nodes(node_list, sorted_nodes) + return nodes_order From caa5fe132b615191335e8923dc9a447c3eb50012 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:32:33 +0100 Subject: [PATCH 11/44] [Refactor] Use function aliases and deprecate original functions --- src/lmd/segmentation.py | 247 ++++++++++++++++------------------------ 1 file changed, 95 insertions(+), 152 deletions(-) diff --git a/src/lmd/segmentation.py b/src/lmd/segmentation.py index 896fedc..c89fd15 100644 --- a/src/lmd/segmentation.py +++ b/src/lmd/segmentation.py @@ -1,8 +1,8 @@ import gc +import warnings import numba as nb import numpy as np -from hilbertcurve.hilbertcurve import HilbertCurve from numba import njit, prange, types from scipy.sparse import coo_array @@ -154,171 +154,114 @@ def get_coordinate_form(classes, coords_lookup, debug=False): return center, length, coords_filtered -def tsp_hilbert_solve(data, p=3): - p = p - n = 2 - max_n = 2 ** (p * n) - hilbert_curve = HilbertCurve(p, n) - distances = list(range(max_n)) - hilbert_points = hilbert_curve.points_from_distances(distances) - hilbert_points = np.array(hilbert_points) - - data_min = np.min(data, axis=0) - data_max = np.max(data, axis=0) - - hilbert_min = np.min(hilbert_points, axis=0) - hilbert_max = np.max(hilbert_points, axis=0) - - data_scaled = data - data_min - data_scaled = data_scaled / (data_max - data_min) * (hilbert_max - hilbert_min) - - data_rounded = np.round(data_scaled).astype(int) - - order = assign_vertices(hilbert_points, data_rounded) - - return order - - -# TODO: Remove unused argument `world_size` -# TODO: Add type hints -# TODO: Add docstrings -# return the first element not present in a list -def _get_closest(used, choices, world_size): - for element in choices: - if element not in used: - # knn matrix contains -1 if the number of elements is smaller than k - if element == -1: - return None - else: - return element - - return None - # all choices have been taken, return closest free index due to local optimality - - -# TODO: Rename to TSP (traveling sales person) -# TODO: Add type hints -# TODO: Add umap as optional dependency -def _tps_greedy_solve(data, k=100): - samples = len(data) +# ============================================================================= +# Deprecation Aliases - Path Optimization Functions +# ============================================================================= +# These functions have been moved to lmd.path module. +# Imports from lmd.segmentation are deprecated and will be removed in v3.0.0 + +from lmd.path import ( + _get_closest as __get_closest, +) +from lmd.path import ( + _get_nodes as __get_nodes, +) +from lmd.path import ( + _tps_greedy_solve as __tps_greedy_solve, +) +from lmd.path import ( + assign_vertices as _assign_vertices, +) +from lmd.path import ( + calc_len as _calc_len, +) +from lmd.path import ( + tsp_greedy_solve as _tsp_greedy_solve, +) +from lmd.path import ( + tsp_hilbert_solve as _tsp_hilbert_solve, +) - print(f"{samples} nodes left") - # recursive abort - if samples == 1: - return data - import umap - - knn_index, knn_dist, _ = umap.umap_.nearest_neighbors( - data, n_neighbors=k, metric="euclidean", metric_kwds={}, angular=True, random_state=np.random.RandomState(42) +def calc_len(data): + """Deprecated: Use lmd.path.calc_len instead.""" + warnings.warn( + "calc_len has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import calc_len", + DeprecationWarning, + stacklevel=2, ) + return _calc_len(data) - knn_index = knn_index[:, 1:] - knn_dist = knn_dist[:, 1:] - - # follow greedy knn as long as a nearest neighbour is found in the current tree - nodes = [] - current_node = 0 - while current_node is not None: - nodes.append(current_node) - # print(current_node, knn_index[current_node], next_node) - next_node = _get_closest(nodes, knn_index[current_node], samples) - - current_node = next_node - - # as soon as no nearest neigbour can be found, create a new list of all elements still remeining - # nodes: [0, 2, 5], nodes_left: [1, 3, 4, 6, 7, 8, 9] - # add the last node assigned as starting point to the new list - # nodes: [0, 2], nodes_left: [5, 1, 3, 4, 6, 7, 8, 9] - - nodes_left = list(set(range(samples)) - set(nodes)) - - # add last node from nodes to nodes_left - - nodes_left = [nodes.pop(-1)] + nodes_left - - node_data_left = data[nodes_left] - - # join lists - - return np.concatenate([data[nodes], _tps_greedy_solve(node_data_left, k=k)]) - - -# TODO: Add type hints -# TODO: Add docstrings -# calculate the index array for a sorted 2d list based on an unsorted list -@njit() -def _get_nodes(data, sorted_data): - indexed_data = list(enumerate(data)) - - nodes = [] - print("start sorting") - for element in sorted_data: - for j, tup in enumerate(indexed_data): - i, el = tup - - if np.array_equal(el, element): - nodes.append(i) - indexed_data.pop(j) - return nodes +def tsp_hilbert_solve(data, p=3): + """Deprecated: Use lmd.path.tsp_hilbert_solve instead.""" + warnings.warn( + "tsp_hilbert_solve has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import tsp_hilbert_solve", + DeprecationWarning, + stacklevel=2, + ) + return _tsp_hilbert_solve(data, p=p) -# TODO: Add type hints def tsp_greedy_solve(node_list, k=100, return_sorted=False): - """Find an approximation of the closest path through a list of coordinates - - Args: - node_list (np.array): Array of shape `(N, 2)` containing a list of coordinates - - k (int, default: 100): Number of Nearest Neighbours calculated for each Node. - - return_sorted: If set to False a list of indices is returned. If set to True the sorted coordinates are returned. - - """ - - sorted_nodes = _tps_greedy_solve(node_list) - - if return_sorted: - return sorted_nodes - - else: - nodes_order = _get_nodes(node_list, sorted_nodes) - return nodes_order + """Deprecated: Use lmd.path.tsp_greedy_solve instead.""" + warnings.warn( + "tsp_greedy_solve has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import tsp_greedy_solve", + DeprecationWarning, + stacklevel=2, + ) + return _tsp_greedy_solve(node_list, k=k, return_sorted=return_sorted) -@njit() def assign_vertices(hilbert_points, data_rounded): - data_rounded = data_rounded.astype(np.int64) - hilbert_points = hilbert_points.astype(np.int64) - - output_order = np.zeros(len(data_rounded)).astype(np.int64) - current_index = 0 - - for hilbert_point in hilbert_points: - for i, data_point in enumerate(data_rounded): - if np.array_equal(hilbert_point, data_point): - output_order[current_index] = i - current_index += 1 - - return output_order - - -def calc_len(data): - """calculate the length of a path based on a list of coordinates + """Deprecated: Use lmd.path.assign_vertices instead.""" + warnings.warn( + "assign_vertices has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import assign_vertices", + DeprecationWarning, + stacklevel=2, + ) + return _assign_vertices(hilbert_points, data_rounded) - Args: - data (np.array): Array of shape `(N, 2)` containing a list of coordinates - """ +def _get_closest(used, choices, world_size): + """Deprecated: Use lmd.path._get_closest instead.""" + warnings.warn( + "_get_closest has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import _get_closest", + DeprecationWarning, + stacklevel=2, + ) + return __get_closest(used, choices, world_size) - index = np.arange(len(data)).astype(int) - not_shifted = data[index[:-1]] - shifted = data[index[1:]] +def _get_nodes(data, sorted_data): + """Deprecated: Use lmd.path._get_nodes instead.""" + warnings.warn( + "_get_nodes has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import _get_nodes", + DeprecationWarning, + stacklevel=2, + ) + return __get_nodes(data, sorted_data) - diff = not_shifted - shifted - sq = np.square(diff) - dist = np.sum(np.sqrt(np.sum(sq, axis=1))) - return dist +def _tps_greedy_solve(data, k=100): + """Deprecated: Use lmd.path._tps_greedy_solve instead.""" + warnings.warn( + "_tps_greedy_solve has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import _tps_greedy_solve", + DeprecationWarning, + stacklevel=2, + ) + return __tps_greedy_solve(data, k=k) From 66e19610944342edbaf9fa66a13a2242fe6afa98 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:33:11 +0100 Subject: [PATCH 12/44] Move imports to top of module --- src/lmd/segmentation.py | 56 ++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/lmd/segmentation.py b/src/lmd/segmentation.py index c89fd15..d567b28 100644 --- a/src/lmd/segmentation.py +++ b/src/lmd/segmentation.py @@ -6,6 +6,33 @@ from numba import njit, prange, types from scipy.sparse import coo_array +# ============================================================================= +# Deprecation Aliases - Path Optimization Functions +# ============================================================================= +# These functions have been moved to lmd.path module. +# Imports from lmd.segmentation are deprecated and will be removed in v3.0.0 +from lmd.path import ( + _get_closest as __get_closest, +) +from lmd.path import ( + _get_nodes as __get_nodes, +) +from lmd.path import ( + _tps_greedy_solve as __tps_greedy_solve, +) +from lmd.path import ( + assign_vertices as _assign_vertices, +) +from lmd.path import ( + calc_len as _calc_len, +) +from lmd.path import ( + tsp_greedy_solve as _tsp_greedy_solve, +) +from lmd.path import ( + tsp_hilbert_solve as _tsp_hilbert_solve, +) + # TODO: Rename index_list to index_dict to correctly represent type # TODO: Rename index_list to index_dict to correctly represent type @@ -154,35 +181,6 @@ def get_coordinate_form(classes, coords_lookup, debug=False): return center, length, coords_filtered -# ============================================================================= -# Deprecation Aliases - Path Optimization Functions -# ============================================================================= -# These functions have been moved to lmd.path module. -# Imports from lmd.segmentation are deprecated and will be removed in v3.0.0 - -from lmd.path import ( - _get_closest as __get_closest, -) -from lmd.path import ( - _get_nodes as __get_nodes, -) -from lmd.path import ( - _tps_greedy_solve as __tps_greedy_solve, -) -from lmd.path import ( - assign_vertices as _assign_vertices, -) -from lmd.path import ( - calc_len as _calc_len, -) -from lmd.path import ( - tsp_greedy_solve as _tsp_greedy_solve, -) -from lmd.path import ( - tsp_hilbert_solve as _tsp_hilbert_solve, -) - - def calc_len(data): """Deprecated: Use lmd.path.calc_len instead.""" warnings.warn( From 37678bb49fc40be53e67c832cb4a6064efa3aef2 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:40:38 +0100 Subject: [PATCH 13/44] [Test] Move unittests to test_path.py and validate that they are equivalent --- tests/unit/test_lmd/test_path.py | 170 +++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 tests/unit/test_lmd/test_path.py diff --git a/tests/unit/test_lmd/test_path.py b/tests/unit/test_lmd/test_path.py new file mode 100644 index 0000000..4c0e3c0 --- /dev/null +++ b/tests/unit/test_lmd/test_path.py @@ -0,0 +1,170 @@ +"""Tests for lmd.path module - path optimization functions.""" + +from typing import Optional + +import numpy as np +import pytest + +from lmd.path import ( + _get_closest, + _get_nodes, + assign_vertices, + calc_len, + tsp_hilbert_solve, +) + +ATOL = 1e-10 + + +@pytest.mark.parametrize( + ("data", "p"), + [ + # Two points - should return valid ordering + (np.array([[0.0, 0.0], [1.0, 1.0]]), 3), + # Four corner points + (np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]), 3), + # Linear points + (np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]]), 3), + # More points with different p value + (np.array([[0.0, 0.0], [0.25, 0.25], [0.5, 0.5], [0.75, 0.75], [1.0, 1.0]]), 4), + ], + ids=("two_points", "four_corners", "linear_points", "five_points_p4"), +) +def test_tsp_hilbert_solve(data: np.ndarray, p: int) -> None: + """Test `tsp_hilbert_solve` returns valid ordering""" + # Act + result = tsp_hilbert_solve(data=data, p=p) + + # Assert - result should be a permutation of indices + assert len(result) == len(data) + assert set(result) == set(range(len(data))) + + +@pytest.mark.parametrize( + ("used", "choices", "world_size", "expected_result"), + [ + # First element not in used is returned + ([], [0, 1, 2], 10, 0), + # First element is used, return second + ([0], [0, 1, 2], 10, 1), + # First two elements used, return third + ([0, 1], [0, 1, 2], 10, 2), + # All elements are used, return None + ([0, 1, 2], [0, 1, 2], 10, None), + # First available element is -1, return None + ([0], [0, -1, 2], 10, None), + # -1 at start and not in used returns None + ([], [-1, 1, 2], 10, None), + # Skip used elements to find first available + ([0, 2], [0, 2, 1, 3], 10, 1), + # Empty choices returns None + ([0], [], 10, None), + ], + ids=( + "first_available", + "skip_one_used", + "skip_two_used", + "all_used", + "minus_one_element", + "minus_one_first", + "non_sequential_choices", + "empty_choices", + ), +) +def test__get_closest(used: list[int], choices: list[int], world_size: int, expected_result: Optional[int]) -> None: + """Test `_get_closest` for finding first unused element from choices""" + # Act + result = _get_closest(used=used, choices=choices, world_size=world_size) + + # Assert + assert result == expected_result + + +# TODO: Rename test when fixing function name +# Note: _tps_greedy_solve and tsp_greedy_solve require umap dependency and are tested in integration tests + + +@pytest.mark.parametrize( + ("data", "sorted_data", "expected_nodes"), + [ + # Single element + (np.array([[0, 0]]), np.array([[0, 0]]), [0]), + # Already sorted - indices are 0, 1, 2 + ( + np.array([[0, 0], [1, 1], [2, 2]]), + np.array([[0, 0], [1, 1], [2, 2]]), + [0, 1, 2], + ), + # Reversed order - indices map sorted back to original positions + ( + np.array([[2, 2], [1, 1], [0, 0]]), + np.array([[0, 0], [1, 1], [2, 2]]), + [2, 1, 0], + ), + # Arbitrary reordering + ( + np.array([[1, 0], [0, 1], [2, 2], [0, 0]]), + np.array([[0, 0], [1, 0], [0, 1], [2, 2]]), + [3, 0, 1, 2], + ), + ], + ids=("single_element", "already_sorted", "reversed", "arbitrary_reorder"), +) +def test__get_nodes(data: np.ndarray, sorted_data: np.ndarray, expected_nodes: list[int]) -> None: + """Test `_get_nodes` for calculating index array mapping sorted to original data""" + result = _get_nodes(data=data, sorted_data=sorted_data) + + assert list(result) == expected_nodes + + +@pytest.mark.parametrize( + ("hilbert_points", "data_rounded", "expected_order"), + [ + # Single point + (np.array([[0, 0]]), np.array([[0, 0]]), np.array([0])), + # Two points in hilbert order + (np.array([[0, 0], [1, 0]]), np.array([[0, 0], [1, 0]]), np.array([0, 1])), + # Two points in reverse hilbert order - order reflects hilbert traversal + (np.array([[0, 0], [1, 0]]), np.array([[1, 0], [0, 0]]), np.array([1, 0])), + # Three points with reordering + ( + np.array([[0, 0], [0, 1], [1, 1]]), + np.array([[1, 1], [0, 0], [0, 1]]), + np.array([1, 2, 0]), + ), + # Points not all on hilbert curve - only matching points are ordered + ( + np.array([[0, 0], [1, 0], [2, 0]]), + np.array([[0, 0], [2, 0]]), + np.array([0, 1]), + ), + ], + ids=( + "single_point", + "two_points_in_order", + "two_points_reversed", + "three_points_reorder", + "partial_match", + ), +) +def test_assign_vertices(hilbert_points: np.ndarray, data_rounded: np.ndarray, expected_order: np.ndarray) -> None: + """Test `assign_vertices` for ordering data points along hilbert curve""" + result = assign_vertices(hilbert_points=hilbert_points, data_rounded=data_rounded) + + assert np.array_equal(result[: len(expected_order)], expected_order) + + +@pytest.mark.parametrize( + ("array", "expected_result"), + [ + (np.array([[0], [1], [2]]), 2), + (np.array([[0, 0], [1, 0], [2, 0]]), 2), + (np.array([[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]), 4), + ], + ids=("1d_like", "linear", "square_circular_path"), +) +def test_calc_len(array: np.ndarray, expected_result: float) -> None: + """Test `calc_len` for the computation of a path length""" + result = calc_len(array) + + assert result == expected_result From c0bfaf1bb513e669424199739c3887c8e7adcea7 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:41:37 +0100 Subject: [PATCH 14/44] [Test] Add tests for deprecation warnings --- tests/unit/test_lmd/test_segmentation.py | 225 +++++++++-------------- 1 file changed, 82 insertions(+), 143 deletions(-) diff --git a/tests/unit/test_lmd/test_segmentation.py b/tests/unit/test_lmd/test_segmentation.py index 14bc766..3f5d16c 100644 --- a/tests/unit/test_lmd/test_segmentation.py +++ b/tests/unit/test_lmd/test_segmentation.py @@ -1,4 +1,4 @@ -from typing import Optional +import warnings import numpy as np import pytest @@ -7,13 +7,8 @@ _create_coord_index, _create_coord_index_sparse, _filter_coord_index, - _get_closest, - _get_nodes, _numba_accelerator_coord_calculation, - assign_vertices, - calc_len, get_coordinate_form, - tsp_hilbert_solve, ) ATOL = 1e-10 @@ -251,155 +246,99 @@ def test_get_coordinate_form( assert np.allclose(cf, ecf, atol=ATOL) -@pytest.mark.parametrize( - ("data", "p"), - [ - # Two points - should return valid ordering - (np.array([[0.0, 0.0], [1.0, 1.0]]), 3), - # Four corner points - (np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]), 3), - # Linear points - (np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]]), 3), - # More points with different p value - (np.array([[0.0, 0.0], [0.25, 0.25], [0.5, 0.5], [0.75, 0.75], [1.0, 1.0]]), 4), - ], - ids=("two_points", "four_corners", "linear_points", "five_points_p4"), -) -def test_tsp_hilbert_solve(data: np.ndarray, p: int) -> None: - """Test `tsp_hilbert_solve` returns valid ordering""" - # Act - result = tsp_hilbert_solve(data=data, p=p) +# ============================================================================= +# Deprecation Warning Tests +# ============================================================================= +# These tests verify that importing path functions from lmd.segmentation +# raises deprecation warnings. The actual function tests are in test_path.py. - # Assert - result should be a permutation of indices - assert len(result) == len(data) - assert set(result) == set(range(len(data))) +class TestDeprecationWarnings: + """Test that deprecated imports from lmd.segmentation raise DeprecationWarning.""" -@pytest.mark.parametrize( - ("used", "choices", "world_size", "expected_result"), - [ - # First element not in used is returned - ([], [0, 1, 2], 10, 0), - # First element is used, return second - ([0], [0, 1, 2], 10, 1), - # First two elements used, return third - ([0, 1], [0, 1, 2], 10, 2), - # All elements are used, return None - ([0, 1, 2], [0, 1, 2], 10, None), - # First available element is -1, return None - ([0], [0, -1, 2], 10, None), - # -1 at start and not in used returns None - ([], [-1, 1, 2], 10, None), - # Skip used elements to find first available - ([0, 2], [0, 2, 1, 3], 10, 1), - # Empty choices returns None - ([0], [], 10, None), - ], - ids=( - "first_available", - "skip_one_used", - "skip_two_used", - "all_used", - "minus_one_element", - "minus_one_first", - "non_sequential_choices", - "empty_choices", - ), -) -def test__get_closest(used: list[int], choices: list[int], world_size: int, expected_result: Optional[int]) -> None: - """Test `_get_closest` for finding first unused element from choices""" - # Act - result = _get_closest(used=used, choices=choices, world_size=world_size) + def test_calc_len_deprecation_warning(self) -> None: + """Test that calc_len raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import calc_len - # Assert - assert result == expected_result + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + calc_len(np.array([[0, 0], [1, 1]])) + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "calc_len has been moved to lmd.path" in str(w[0].message) -# TODO: Rename test when fixing function name -# Note: _tps_greedy_solve and tsp_greedy_solve require umap dependency and are tested in integration tests + def test_tsp_hilbert_solve_deprecation_warning(self) -> None: + """Test that tsp_hilbert_solve raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import tsp_hilbert_solve + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + tsp_hilbert_solve(np.array([[0.0, 0.0], [1.0, 1.0]]), p=3) -@pytest.mark.parametrize( - ("data", "sorted_data", "expected_nodes"), - [ - # Single element - (np.array([[0, 0]]), np.array([[0, 0]]), [0]), - # Already sorted - indices are 0, 1, 2 - ( - np.array([[0, 0], [1, 1], [2, 2]]), - np.array([[0, 0], [1, 1], [2, 2]]), - [0, 1, 2], - ), - # Reversed order - indices map sorted back to original positions - ( - np.array([[2, 2], [1, 1], [0, 0]]), - np.array([[0, 0], [1, 1], [2, 2]]), - [2, 1, 0], - ), - # Arbitrary reordering - ( - np.array([[1, 0], [0, 1], [2, 2], [0, 0]]), - np.array([[0, 0], [1, 0], [0, 1], [2, 2]]), - [3, 0, 1, 2], - ), - ], - ids=("single_element", "already_sorted", "reversed", "arbitrary_reorder"), -) -def test__get_nodes(data: np.ndarray, sorted_data: np.ndarray, expected_nodes: list[int]) -> None: - """Test `_get_nodes` for calculating index array mapping sorted to original data""" - result = _get_nodes(data=data, sorted_data=sorted_data) + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "tsp_hilbert_solve has been moved to lmd.path" in str(w[0].message) - assert list(result) == expected_nodes + def test_tsp_greedy_solve_deprecation_warning(self) -> None: + """Test that tsp_greedy_solve raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import tsp_greedy_solve + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + # Note: tsp_greedy_solve requires umap, so we just check the warning is raised + # before the actual call would happen + tsp_greedy_solve(np.array([[0.0, 0.0], [1.0, 1.0]])) -@pytest.mark.parametrize( - ("hilbert_points", "data_rounded", "expected_order"), - [ - # Single point - (np.array([[0, 0]]), np.array([[0, 0]]), np.array([0])), - # Two points in hilbert order - (np.array([[0, 0], [1, 0]]), np.array([[0, 0], [1, 0]]), np.array([0, 1])), - # Two points in reverse hilbert order - order reflects hilbert traversal - (np.array([[0, 0], [1, 0]]), np.array([[1, 0], [0, 0]]), np.array([1, 0])), - # Three points with reordering - ( - np.array([[0, 0], [0, 1], [1, 1]]), - np.array([[1, 1], [0, 0], [0, 1]]), - np.array([1, 2, 0]), - ), - # Points not all on hilbert curve - only matching points are ordered - ( - np.array([[0, 0], [1, 0], [2, 0]]), - np.array([[0, 0], [2, 0]]), - np.array([0, 1]), - ), - ], - ids=( - "single_point", - "two_points_in_order", - "two_points_reversed", - "three_points_reorder", - "partial_match", - ), -) -def test_assign_vertices(hilbert_points: np.ndarray, data_rounded: np.ndarray, expected_order: np.ndarray) -> None: - """Test `assign_vertices` for ordering data points along hilbert curve""" - result = assign_vertices(hilbert_points=hilbert_points, data_rounded=data_rounded) + assert len(w) >= 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "tsp_greedy_solve has been moved to lmd.path" in str(w[0].message) - assert np.array_equal(result[: len(expected_order)], expected_order) + def test_assign_vertices_deprecation_warning(self) -> None: + """Test that assign_vertices raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import assign_vertices + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assign_vertices(np.array([[0, 0]]), np.array([[0, 0]])) -@pytest.mark.parametrize( - ("array", "expected_result"), - [ - (np.array([[0], [1], [2]]), 2), - (np.array([[0, 0], [1, 0], [2, 0]]), 2), - (np.array([[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]), 4), - ], - ids=("1d_like", "linear", "square_circular_path"), -) -def test_calc_len(array: np.ndarray, expected_result: float) -> None: - """Test `calc_len` for the computation of a path length""" - result = calc_len(array) + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "assign_vertices has been moved to lmd.path" in str(w[0].message) + + def test__get_closest_deprecation_warning(self) -> None: + """Test that _get_closest raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import _get_closest + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + _get_closest([], [0, 1, 2], 10) + + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "_get_closest has been moved to lmd.path" in str(w[0].message) + + def test__get_nodes_deprecation_warning(self) -> None: + """Test that _get_nodes raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import _get_nodes + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + _get_nodes(np.array([[0, 0]]), np.array([[0, 0]])) + + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "_get_nodes has been moved to lmd.path" in str(w[0].message) + + def test__tps_greedy_solve_deprecation_warning(self) -> None: + """Test that _tps_greedy_solve raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import _tps_greedy_solve + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + _tps_greedy_solve(np.array([[0.0, 0.0], [1.0, 1.0]])) - assert result == expected_result + assert len(w) >= 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "_tps_greedy_solve has been moved to lmd.path" in str(w[0].message) From bec0e928a8313cc5ec2595782a19161d76f39416 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:52:38 +0100 Subject: [PATCH 15/44] [Refactor] Fix typo. Rename _tps_greedy_solve to _tsp_greedy_solve and update deprecation warning --- src/lmd/path.py | 7 +++---- src/lmd/segmentation.py | 10 +++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/lmd/path.py b/src/lmd/path.py index bbd8d0b..38f98e2 100644 --- a/src/lmd/path.py +++ b/src/lmd/path.py @@ -94,10 +94,9 @@ def _get_closest(used, choices, world_size): # all choices have been taken, return closest free index due to local optimality -# TODO: Rename to TSP (traveling sales person) # TODO: Add type hints # TODO: Add umap as optional dependency -def _tps_greedy_solve(data, k=100): +def _tsp_greedy_solve(data, k=100): samples = len(data) print(f"{samples} nodes left") @@ -139,7 +138,7 @@ def _tps_greedy_solve(data, k=100): # join lists - return np.concatenate([data[nodes], _tps_greedy_solve(node_data_left, k=k)]) + return np.concatenate([data[nodes], _tsp_greedy_solve(node_data_left, k=k)]) # TODO: Add type hints @@ -175,7 +174,7 @@ def tsp_greedy_solve(node_list, k=100, return_sorted=False): """ - sorted_nodes = _tps_greedy_solve(node_list) + sorted_nodes = _tsp_greedy_solve(node_list) if return_sorted: return sorted_nodes diff --git a/src/lmd/segmentation.py b/src/lmd/segmentation.py index d567b28..6ba3e97 100644 --- a/src/lmd/segmentation.py +++ b/src/lmd/segmentation.py @@ -18,7 +18,7 @@ _get_nodes as __get_nodes, ) from lmd.path import ( - _tps_greedy_solve as __tps_greedy_solve, + _tsp_greedy_solve as __tsp_greedy_solve, ) from lmd.path import ( assign_vertices as _assign_vertices, @@ -254,12 +254,12 @@ def _get_nodes(data, sorted_data): def _tps_greedy_solve(data, k=100): - """Deprecated: Use lmd.path._tps_greedy_solve instead.""" + """Deprecated: Use lmd.path._tsp_greedy_solve instead.""" warnings.warn( - "_tps_greedy_solve has been moved to lmd.path. " + "_tps_greedy_solve has been moved to lmd.path and renamed to _tsp_greedy_solve. " "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " - "Please use: from lmd.path import _tps_greedy_solve", + "Please use: from lmd.path import _tsp_greedy_solve", DeprecationWarning, stacklevel=2, ) - return __tps_greedy_solve(data, k=k) + return __tsp_greedy_solve(data, k=k) From 453bbd509906916748e709e4dd8fff8834a30993 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 01:00:04 +0100 Subject: [PATCH 16/44] Add optional umap dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index ef14f37..b31372d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ classifiers = [ dependencies = {file = ["requirements.txt"]} [tool.setuptools.dynamic.optional-dependencies] +umap = {"umap-learn"} dev_only = {file = ["requirements_dev.txt"]} doc = {file = ["requirements_doc.txt"]} test = {file = ["requirements_test.txt"]} From 467aa11c8a810267b1f3641914893a6e4a4e45f2 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 01:01:42 +0100 Subject: [PATCH 17/44] [Dep] Add umap as optional dependency --- pyproject.toml | 2 +- requirements_umap.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100755 requirements_umap.txt diff --git a/pyproject.toml b/pyproject.toml index b31372d..42780e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ classifiers = [ dependencies = {file = ["requirements.txt"]} [tool.setuptools.dynamic.optional-dependencies] -umap = {"umap-learn"} +umap = {file = ["requirements_umap.txt"]} dev_only = {file = ["requirements_dev.txt"]} doc = {file = ["requirements_doc.txt"]} test = {file = ["requirements_test.txt"]} diff --git a/requirements_umap.txt b/requirements_umap.txt new file mode 100755 index 0000000..51f1982 --- /dev/null +++ b/requirements_umap.txt @@ -0,0 +1,2 @@ +pre-commit +pytest From d1606331b8fc59acccd7ed1e7c9529165b9fbd0d Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 01:03:36 +0100 Subject: [PATCH 18/44] Install optional umap dependency for CI/CD tests --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 4da6e90..6cd4ba7 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -27,7 +27,7 @@ jobs: - name: Install package run: | pip install uv - uv pip install --system ".[tests, dev]" + uv pip install --system ".[tests, dev, umap]" - name: Run pre-commit checks run: | pre-commit install From 5842880099ac8e4ca3550a71e207e85cee09cce0 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 01:04:36 +0100 Subject: [PATCH 19/44] [Dep] Fix requirements file and specify umap --- requirements_umap.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements_umap.txt b/requirements_umap.txt index 51f1982..cfb6acd 100755 --- a/requirements_umap.txt +++ b/requirements_umap.txt @@ -1,2 +1 @@ -pre-commit -pytest +umap-learn From 4e21c26513ba95a67e41880ba616e4a57084524d Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 01:05:09 +0100 Subject: [PATCH 20/44] [feature] Add ImportError when umap is not installed --- src/lmd/path.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/lmd/path.py b/src/lmd/path.py index 38f98e2..a1536d5 100644 --- a/src/lmd/path.py +++ b/src/lmd/path.py @@ -104,7 +104,12 @@ def _tsp_greedy_solve(data, k=100): if samples == 1: return data - import umap + try: + import umap + except ImportError: + raise ImportError( + "umap-learn is required for this function. " "Install it with: pip install py-lmd[umap]" + ) from None knn_index, knn_dist, _ = umap.umap_.nearest_neighbors( data, n_neighbors=k, metric="euclidean", metric_kwds={}, angular=True, random_state=np.random.RandomState(42) From 289b4b6391053cdd937449af0e9af77c0b0e3e29 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sat, 17 Jan 2026 11:24:58 +0100 Subject: [PATCH 21/44] _geom: Remove unused import and sort import block --- src/lmd/lib/_geom.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lmd/lib/_geom.py b/src/lmd/lib/_geom.py index 9436888..1209f4f 100644 --- a/src/lmd/lib/_geom.py +++ b/src/lmd/lib/_geom.py @@ -3,8 +3,6 @@ from __future__ import annotations import re - -# import warnings import warnings from typing import Any From 82d355d99132749d5d8ebabb51b75adab40a3af9 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sat, 17 Jan 2026 11:25:20 +0100 Subject: [PATCH 22/44] _segmentation: Remove unused import and sort import block --- src/lmd/lib/_segmentation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lmd/lib/_segmentation.py b/src/lmd/lib/_segmentation.py index fb29d30..a8b2e50 100644 --- a/src/lmd/lib/_segmentation.py +++ b/src/lmd/lib/_segmentation.py @@ -5,8 +5,6 @@ import os import platform import sys - -# import warnings import warnings from functools import partial, reduce from pathlib import Path From 53874cc0066076e75acdb61f9c8ff8495aecf513 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sat, 17 Jan 2026 11:27:06 +0100 Subject: [PATCH 23/44] Fix typo --- src/lmd/lib/_segmentation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lmd/lib/_segmentation.py b/src/lmd/lib/_segmentation.py index a8b2e50..550b472 100644 --- a/src/lmd/lib/_segmentation.py +++ b/src/lmd/lib/_segmentation.py @@ -115,7 +115,7 @@ class SegmentationLoader: # valid options are ["none", "hilbert", "greedy"] path_optimization: "hilbert" - # Paramter required for hilbert curve based path optimization. + # Parameter required for hilbert curve based path optimization. # Defines the order of the hilbert curve used, which needs to be tuned with the total cutting area. # For areas of 1 x 1 mm we recommend at least p = 4, for whole slides we recommend p = 7. hilbert_p: 7 @@ -123,7 +123,7 @@ class SegmentationLoader: # Parameter required for greedy path optimization. # Instead of a global distance matrix, the k nearest neighbours are approximated. # The optimization problem is then greedily solved for the known set of nearest neighbours until the first set of neighbours is exhausted. - # Established edges are then removed and the nearest neighbour approximation is recursivly repeated. + # Established edges are then removed and the nearest neighbour approximation is recursively repeated. greedy_k: 20 # Overlapping shapes are merged based on a nearest neighbour heuristic. From b707dafacc167d7682ba539c925827ef5e422ca4 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sat, 17 Jan 2026 11:30:42 +0100 Subject: [PATCH 24/44] Fix docstring --- src/lmd/lib/_segmentation.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/lmd/lib/_segmentation.py b/src/lmd/lib/_segmentation.py index 550b472..6358eb8 100644 --- a/src/lmd/lib/_segmentation.py +++ b/src/lmd/lib/_segmentation.py @@ -35,13 +35,9 @@ class SegmentationLoader: config (dict): Dict containing configuration parameters. See Note for further explanation. processes (int): Number of processes used for parallel processing of cell sets. Total processes can be calculated as `processes * threads`. threads (int): Number of threads used for parallel processing of shapes within a cell set. Total processes can be calculated as `processes * threads`. - cell_sets (list(dict)): List of dictionaries containing the sets of cells which should be sorted into a single well. - - calibration_marker (np.array): Array of size '(3,2)' containing the calibration marker coordinates in the '(row, column)' format. - + calibration_points (np.array): Array of size '(3,2)' containing the calibration marker coordinates in the '(row, column)' format. coords_lookup (None, dict): precalculated lookup table for coordinates of individual cell ids. If not provided will be calculated. - classes (np.array): Array of classes found in the provided segmentation mask. If not provided will be calculated based on the assumption that cell_ids are assigned in ascending order. Example: @@ -52,13 +48,6 @@ class SegmentationLoader: from PIL import Image from lmd.lib import SegmentationLoader - Args: - config (dict): Dict containing configuration parameters. See Note for further explanation. - - cell_sets (list(dict)): List of dictionaries containing the sets of cells which should be sorted into a single well. - - calibration_marker (np.array): Array of size '(3,2)' containing the calibration marker coordinates in the '(row, column)' format. - Example: .. code-block:: python From 512fca464ba13f92b4fbaa2a7ba609b74d5aeb9d Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sat, 17 Jan 2026 11:33:32 +0100 Subject: [PATCH 25/44] Sort import block --- src/lmd/lib/_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lmd/lib/_utils.py b/src/lmd/lib/_utils.py index 788d599..161d282 100644 --- a/src/lmd/lib/_utils.py +++ b/src/lmd/lib/_utils.py @@ -5,8 +5,6 @@ import matplotlib.pyplot as plt import numpy as np - -# import warnings from rdp import rdp from scipy import ndimage from scipy.signal import convolve2d From fa4417d752d20ac0b9a62299d03ca0e8ba66ff6c Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sat, 17 Jan 2026 11:33:41 +0100 Subject: [PATCH 26/44] Fix typo --- tests/unit/test_lmd/test_lib/test_lib_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_lmd/test_lib/test_lib_utils.py b/tests/unit/test_lmd/test_lib/test_lib_utils.py index d74f030..cabec85 100644 --- a/tests/unit/test_lmd/test_lib/test_lib_utils.py +++ b/tests/unit/test_lmd/test_lib/test_lib_utils.py @@ -96,7 +96,7 @@ class TestTransformToMap: @pytest.mark.parametrize( ("coords",), [(np.array([[0, 0]]),), (np.array([[2, 1]]),), (np.array([[0, 0], [2, 1]]),)], - ids=("simple", "assymmetric", "multiple_coords"), + ids=("simple", "asymmetric", "multiple_coords"), ) def test_transform_to_map__no_changes__coord_format(self, coords: np.ndarray) -> None: """Test that `coord_format=True` just returns the coords if no dilation/erosion is applied""" @@ -119,7 +119,7 @@ def test_transform_to_map__no_changes__coord_format(self, coords: np.ndarray) -> np.array([[0, 0], [100, 100]]), ), # boundary left/right, no offset ], - ids=("simple", "assymmetric", "multiple_coords", "with_offsets", "large_without_offsets"), + ids=("simple", "asymmetric", "multiple_coords", "with_offsets", "large_without_offsets"), ) def test_transform_to_map__no_changes__mask_format( self, From 528e7db012e1286a2d6bf46a8bffacf7e6f54065 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sat, 17 Jan 2026 11:34:05 +0100 Subject: [PATCH 27/44] Fix docstring with redundant arguments --- src/lmd/lib/_segmentation.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/lmd/lib/_segmentation.py b/src/lmd/lib/_segmentation.py index 6358eb8..f525ef1 100644 --- a/src/lmd/lib/_segmentation.py +++ b/src/lmd/lib/_segmentation.py @@ -33,11 +33,17 @@ class SegmentationLoader: Args: config (dict): Dict containing configuration parameters. See Note for further explanation. + processes (int): Number of processes used for parallel processing of cell sets. Total processes can be calculated as `processes * threads`. + threads (int): Number of threads used for parallel processing of shapes within a cell set. Total processes can be calculated as `processes * threads`. + cell_sets (list(dict)): List of dictionaries containing the sets of cells which should be sorted into a single well. + calibration_points (np.array): Array of size '(3,2)' containing the calibration marker coordinates in the '(row, column)' format. + coords_lookup (None, dict): precalculated lookup table for coordinates of individual cell ids. If not provided will be calculated. + classes (np.array): Array of classes found in the provided segmentation mask. If not provided will be calculated based on the assumption that cell_ids are assigned in ascending order. Example: From 2cdd07f4098441be8b6b5ed415eafd647a47b4b1 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sat, 17 Jan 2026 11:34:41 +0100 Subject: [PATCH 28/44] Fix typo --- src/lmd/lib/_segmentation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lmd/lib/_segmentation.py b/src/lmd/lib/_segmentation.py index f525ef1..e0ee126 100644 --- a/src/lmd/lib/_segmentation.py +++ b/src/lmd/lib/_segmentation.py @@ -581,7 +581,7 @@ def register_parameter(self, key, value): raise NotImplementedError("registration of parameters is not yet supported for nested parameters") else: - raise TypeError("Key musst be of string or a list of strings") + raise TypeError("Key must be of string or a list of strings") if key not in config_handle: self.log(f"No configuration for {key} found, parameter will be set to {value}") From 075bef40a77de548640bb15e2747b2defe12a5fe Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sat, 17 Jan 2026 11:35:06 +0100 Subject: [PATCH 29/44] Fix typo --- src/lmd/lib/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lmd/lib/_utils.py b/src/lmd/lib/_utils.py index 161d282..b3a27af 100644 --- a/src/lmd/lib/_utils.py +++ b/src/lmd/lib/_utils.py @@ -48,7 +48,7 @@ def _execute_indexed_parallel(func: Callable, *, args: list, tqdm_args: dict = N # TODO: Add type hints # TODO: Add docstring to public method def transform_to_map(coords, dilation=0, erosion=0, coord_format=True, debug=False): - # safety boundary which extands the generated map size + # safety boundary which extends the generated map size safety_offset = 3 dilation_offset = int(dilation) From 0870b61e2b9b91dd37d75905a820d3abed076d4b Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sat, 17 Jan 2026 11:36:16 +0100 Subject: [PATCH 30/44] Add todo --- src/lmd/lib/_segmentation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lmd/lib/_segmentation.py b/src/lmd/lib/_segmentation.py index e0ee126..7791bbd 100644 --- a/src/lmd/lib/_segmentation.py +++ b/src/lmd/lib/_segmentation.py @@ -375,6 +375,7 @@ def generate_cutting_data(self, i: int, cell_set: dict) -> Collection: optimized_length = calc_len(center) self.log(f"Optimized path length: {optimized_length:,.2f} units") + # TODO: Remove unused variable optimization_factor optimization_factor = unoptimized_length / optimized_length self.log(f"Optimization factor: {optimization_factor:,.1f}x") else: From a2e9131d875a970d1262ba58a208e633bf634b5b Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sat, 17 Jan 2026 11:37:48 +0100 Subject: [PATCH 31/44] Add TODO --- src/lmd/lib/_segmentation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lmd/lib/_segmentation.py b/src/lmd/lib/_segmentation.py index 7791bbd..0cc89ad 100644 --- a/src/lmd/lib/_segmentation.py +++ b/src/lmd/lib/_segmentation.py @@ -550,6 +550,7 @@ def load_classes(self, cell_set): else: path = os.path.join(Path(self.directory).parents[0], cell_set["classes"]) + # TODO: Close file again https://github.com/MannLabs/py-lmd/pull/61#discussion_r2692370486 if os.path.isfile(path): try: cr = csv.reader(open(path)) From b650ce5ca477032a155ff1c5c0de59326b81b426 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Sat, 17 Jan 2026 11:37:59 +0100 Subject: [PATCH 32/44] Raise type error --- src/lmd/lib/_geom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lmd/lib/_geom.py b/src/lmd/lib/_geom.py index 1209f4f..d072f54 100644 --- a/src/lmd/lib/_geom.py +++ b/src/lmd/lib/_geom.py @@ -361,7 +361,7 @@ def add_shape(self, shape: Shape): if isinstance(shape, Shape): self.shapes.append(shape) else: - TypeError("Provided shape is not of type Shape") + raise TypeError("Provided shape is not of type Shape") def new_shape( self, From cd77ff1d6976a33a6619bbfd8de32914a328ab8d Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:30:06 +0100 Subject: [PATCH 33/44] [API] Update imports in segmenation module --- src/lmd/lib/_segmentation.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/lmd/lib/_segmentation.py b/src/lmd/lib/_segmentation.py index 0cc89ad..28bec8e 100644 --- a/src/lmd/lib/_segmentation.py +++ b/src/lmd/lib/_segmentation.py @@ -16,13 +16,8 @@ from scipy.spatial import cKDTree from tqdm.auto import tqdm -from lmd.segmentation import ( - _create_coord_index_sparse, - calc_len, - get_coordinate_form, - tsp_greedy_solve, - tsp_hilbert_solve, -) +from lmd.path import calc_len, tsp_greedy_solve, tsp_hilbert_solve +from lmd.segmentation import _create_coord_index_sparse, get_coordinate_form from ._geom import Collection from ._utils import _create_poly, _execute_indexed_parallel, transform_to_map From 211b07d62997a18bf11f9d5eed839b2519533715 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:30:43 +0100 Subject: [PATCH 34/44] [Refactor] Move path-optimization-related functions to dedicated module --- src/lmd/path.py | 185 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 src/lmd/path.py diff --git a/src/lmd/path.py b/src/lmd/path.py new file mode 100644 index 0000000..bbd8d0b --- /dev/null +++ b/src/lmd/path.py @@ -0,0 +1,185 @@ +"""Path optimization utilities for LMD cutting path generation. + +This module provides algorithms for solving the Traveling Salesman Problem (TSP) +to optimize the order of cutting shapes, minimizing total travel distance for +laser microdissection operations. + +Functions: + tsp_hilbert_solve: Hilbert curve-based TSP approximation + tsp_greedy_solve: Greedy k-NN based TSP approximation + calc_len: Calculate total path length from coordinates +""" + +import numpy as np +from hilbertcurve.hilbertcurve import HilbertCurve +from numba import njit + + +def calc_len(data): + """calculate the length of a path based on a list of coordinates + + Args: + data (np.array): Array of shape `(N, 2)` containing a list of coordinates + + """ + + index = np.arange(len(data)).astype(int) + + not_shifted = data[index[:-1]] + shifted = data[index[1:]] + + diff = not_shifted - shifted + sq = np.square(diff) + dist = np.sum(np.sqrt(np.sum(sq, axis=1))) + + return dist + + +@njit() +def assign_vertices(hilbert_points, data_rounded): + data_rounded = data_rounded.astype(np.int64) + hilbert_points = hilbert_points.astype(np.int64) + + output_order = np.zeros(len(data_rounded)).astype(np.int64) + current_index = 0 + + for hilbert_point in hilbert_points: + for i, data_point in enumerate(data_rounded): + if np.array_equal(hilbert_point, data_point): + output_order[current_index] = i + current_index += 1 + + return output_order + + +def tsp_hilbert_solve(data, p=3): + p = p + n = 2 + max_n = 2 ** (p * n) + hilbert_curve = HilbertCurve(p, n) + distances = list(range(max_n)) + hilbert_points = hilbert_curve.points_from_distances(distances) + hilbert_points = np.array(hilbert_points) + + data_min = np.min(data, axis=0) + data_max = np.max(data, axis=0) + + hilbert_min = np.min(hilbert_points, axis=0) + hilbert_max = np.max(hilbert_points, axis=0) + + data_scaled = data - data_min + data_scaled = data_scaled / (data_max - data_min) * (hilbert_max - hilbert_min) + + data_rounded = np.round(data_scaled).astype(int) + + order = assign_vertices(hilbert_points, data_rounded) + + return order + + +# TODO: Remove unused argument `world_size` +# TODO: Add type hints +# TODO: Add docstrings +# return the first element not present in a list +def _get_closest(used, choices, world_size): + for element in choices: + if element not in used: + # knn matrix contains -1 if the number of elements is smaller than k + if element == -1: + return None + else: + return element + + return None + # all choices have been taken, return closest free index due to local optimality + + +# TODO: Rename to TSP (traveling sales person) +# TODO: Add type hints +# TODO: Add umap as optional dependency +def _tps_greedy_solve(data, k=100): + samples = len(data) + + print(f"{samples} nodes left") + # recursive abort + if samples == 1: + return data + + import umap + + knn_index, knn_dist, _ = umap.umap_.nearest_neighbors( + data, n_neighbors=k, metric="euclidean", metric_kwds={}, angular=True, random_state=np.random.RandomState(42) + ) + + knn_index = knn_index[:, 1:] + knn_dist = knn_dist[:, 1:] + + # follow greedy knn as long as a nearest neighbour is found in the current tree + nodes = [] + current_node = 0 + while current_node is not None: + nodes.append(current_node) + # print(current_node, knn_index[current_node], next_node) + next_node = _get_closest(nodes, knn_index[current_node], samples) + + current_node = next_node + + # as soon as no nearest neigbour can be found, create a new list of all elements still remeining + # nodes: [0, 2, 5], nodes_left: [1, 3, 4, 6, 7, 8, 9] + # add the last node assigned as starting point to the new list + # nodes: [0, 2], nodes_left: [5, 1, 3, 4, 6, 7, 8, 9] + + nodes_left = list(set(range(samples)) - set(nodes)) + + # add last node from nodes to nodes_left + + nodes_left = [nodes.pop(-1)] + nodes_left + + node_data_left = data[nodes_left] + + # join lists + + return np.concatenate([data[nodes], _tps_greedy_solve(node_data_left, k=k)]) + + +# TODO: Add type hints +# TODO: Add docstrings +# calculate the index array for a sorted 2d list based on an unsorted list +@njit() +def _get_nodes(data, sorted_data): + indexed_data = list(enumerate(data)) + + nodes = [] + + print("start sorting") + for element in sorted_data: + for j, tup in enumerate(indexed_data): + i, el = tup + + if np.array_equal(el, element): + nodes.append(i) + indexed_data.pop(j) + return nodes + + +# TODO: Add type hints +def tsp_greedy_solve(node_list, k=100, return_sorted=False): + """Find an approximation of the closest path through a list of coordinates + + Args: + node_list (np.array): Array of shape `(N, 2)` containing a list of coordinates + + k (int, default: 100): Number of Nearest Neighbours calculated for each Node. + + return_sorted: If set to False a list of indices is returned. If set to True the sorted coordinates are returned. + + """ + + sorted_nodes = _tps_greedy_solve(node_list) + + if return_sorted: + return sorted_nodes + + else: + nodes_order = _get_nodes(node_list, sorted_nodes) + return nodes_order From b81011b425afd8a88eefbdc53d7dbabc9eed83f5 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:32:33 +0100 Subject: [PATCH 35/44] [Refactor] Use function aliases and deprecate original functions --- src/lmd/segmentation.py | 247 ++++++++++++++++------------------------ 1 file changed, 95 insertions(+), 152 deletions(-) diff --git a/src/lmd/segmentation.py b/src/lmd/segmentation.py index 896fedc..c89fd15 100644 --- a/src/lmd/segmentation.py +++ b/src/lmd/segmentation.py @@ -1,8 +1,8 @@ import gc +import warnings import numba as nb import numpy as np -from hilbertcurve.hilbertcurve import HilbertCurve from numba import njit, prange, types from scipy.sparse import coo_array @@ -154,171 +154,114 @@ def get_coordinate_form(classes, coords_lookup, debug=False): return center, length, coords_filtered -def tsp_hilbert_solve(data, p=3): - p = p - n = 2 - max_n = 2 ** (p * n) - hilbert_curve = HilbertCurve(p, n) - distances = list(range(max_n)) - hilbert_points = hilbert_curve.points_from_distances(distances) - hilbert_points = np.array(hilbert_points) - - data_min = np.min(data, axis=0) - data_max = np.max(data, axis=0) - - hilbert_min = np.min(hilbert_points, axis=0) - hilbert_max = np.max(hilbert_points, axis=0) - - data_scaled = data - data_min - data_scaled = data_scaled / (data_max - data_min) * (hilbert_max - hilbert_min) - - data_rounded = np.round(data_scaled).astype(int) - - order = assign_vertices(hilbert_points, data_rounded) - - return order - - -# TODO: Remove unused argument `world_size` -# TODO: Add type hints -# TODO: Add docstrings -# return the first element not present in a list -def _get_closest(used, choices, world_size): - for element in choices: - if element not in used: - # knn matrix contains -1 if the number of elements is smaller than k - if element == -1: - return None - else: - return element - - return None - # all choices have been taken, return closest free index due to local optimality - - -# TODO: Rename to TSP (traveling sales person) -# TODO: Add type hints -# TODO: Add umap as optional dependency -def _tps_greedy_solve(data, k=100): - samples = len(data) +# ============================================================================= +# Deprecation Aliases - Path Optimization Functions +# ============================================================================= +# These functions have been moved to lmd.path module. +# Imports from lmd.segmentation are deprecated and will be removed in v3.0.0 + +from lmd.path import ( + _get_closest as __get_closest, +) +from lmd.path import ( + _get_nodes as __get_nodes, +) +from lmd.path import ( + _tps_greedy_solve as __tps_greedy_solve, +) +from lmd.path import ( + assign_vertices as _assign_vertices, +) +from lmd.path import ( + calc_len as _calc_len, +) +from lmd.path import ( + tsp_greedy_solve as _tsp_greedy_solve, +) +from lmd.path import ( + tsp_hilbert_solve as _tsp_hilbert_solve, +) - print(f"{samples} nodes left") - # recursive abort - if samples == 1: - return data - import umap - - knn_index, knn_dist, _ = umap.umap_.nearest_neighbors( - data, n_neighbors=k, metric="euclidean", metric_kwds={}, angular=True, random_state=np.random.RandomState(42) +def calc_len(data): + """Deprecated: Use lmd.path.calc_len instead.""" + warnings.warn( + "calc_len has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import calc_len", + DeprecationWarning, + stacklevel=2, ) + return _calc_len(data) - knn_index = knn_index[:, 1:] - knn_dist = knn_dist[:, 1:] - - # follow greedy knn as long as a nearest neighbour is found in the current tree - nodes = [] - current_node = 0 - while current_node is not None: - nodes.append(current_node) - # print(current_node, knn_index[current_node], next_node) - next_node = _get_closest(nodes, knn_index[current_node], samples) - - current_node = next_node - - # as soon as no nearest neigbour can be found, create a new list of all elements still remeining - # nodes: [0, 2, 5], nodes_left: [1, 3, 4, 6, 7, 8, 9] - # add the last node assigned as starting point to the new list - # nodes: [0, 2], nodes_left: [5, 1, 3, 4, 6, 7, 8, 9] - - nodes_left = list(set(range(samples)) - set(nodes)) - - # add last node from nodes to nodes_left - - nodes_left = [nodes.pop(-1)] + nodes_left - - node_data_left = data[nodes_left] - - # join lists - - return np.concatenate([data[nodes], _tps_greedy_solve(node_data_left, k=k)]) - - -# TODO: Add type hints -# TODO: Add docstrings -# calculate the index array for a sorted 2d list based on an unsorted list -@njit() -def _get_nodes(data, sorted_data): - indexed_data = list(enumerate(data)) - - nodes = [] - print("start sorting") - for element in sorted_data: - for j, tup in enumerate(indexed_data): - i, el = tup - - if np.array_equal(el, element): - nodes.append(i) - indexed_data.pop(j) - return nodes +def tsp_hilbert_solve(data, p=3): + """Deprecated: Use lmd.path.tsp_hilbert_solve instead.""" + warnings.warn( + "tsp_hilbert_solve has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import tsp_hilbert_solve", + DeprecationWarning, + stacklevel=2, + ) + return _tsp_hilbert_solve(data, p=p) -# TODO: Add type hints def tsp_greedy_solve(node_list, k=100, return_sorted=False): - """Find an approximation of the closest path through a list of coordinates - - Args: - node_list (np.array): Array of shape `(N, 2)` containing a list of coordinates - - k (int, default: 100): Number of Nearest Neighbours calculated for each Node. - - return_sorted: If set to False a list of indices is returned. If set to True the sorted coordinates are returned. - - """ - - sorted_nodes = _tps_greedy_solve(node_list) - - if return_sorted: - return sorted_nodes - - else: - nodes_order = _get_nodes(node_list, sorted_nodes) - return nodes_order + """Deprecated: Use lmd.path.tsp_greedy_solve instead.""" + warnings.warn( + "tsp_greedy_solve has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import tsp_greedy_solve", + DeprecationWarning, + stacklevel=2, + ) + return _tsp_greedy_solve(node_list, k=k, return_sorted=return_sorted) -@njit() def assign_vertices(hilbert_points, data_rounded): - data_rounded = data_rounded.astype(np.int64) - hilbert_points = hilbert_points.astype(np.int64) - - output_order = np.zeros(len(data_rounded)).astype(np.int64) - current_index = 0 - - for hilbert_point in hilbert_points: - for i, data_point in enumerate(data_rounded): - if np.array_equal(hilbert_point, data_point): - output_order[current_index] = i - current_index += 1 - - return output_order - - -def calc_len(data): - """calculate the length of a path based on a list of coordinates + """Deprecated: Use lmd.path.assign_vertices instead.""" + warnings.warn( + "assign_vertices has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import assign_vertices", + DeprecationWarning, + stacklevel=2, + ) + return _assign_vertices(hilbert_points, data_rounded) - Args: - data (np.array): Array of shape `(N, 2)` containing a list of coordinates - """ +def _get_closest(used, choices, world_size): + """Deprecated: Use lmd.path._get_closest instead.""" + warnings.warn( + "_get_closest has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import _get_closest", + DeprecationWarning, + stacklevel=2, + ) + return __get_closest(used, choices, world_size) - index = np.arange(len(data)).astype(int) - not_shifted = data[index[:-1]] - shifted = data[index[1:]] +def _get_nodes(data, sorted_data): + """Deprecated: Use lmd.path._get_nodes instead.""" + warnings.warn( + "_get_nodes has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import _get_nodes", + DeprecationWarning, + stacklevel=2, + ) + return __get_nodes(data, sorted_data) - diff = not_shifted - shifted - sq = np.square(diff) - dist = np.sum(np.sqrt(np.sum(sq, axis=1))) - return dist +def _tps_greedy_solve(data, k=100): + """Deprecated: Use lmd.path._tps_greedy_solve instead.""" + warnings.warn( + "_tps_greedy_solve has been moved to lmd.path. " + "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " + "Please use: from lmd.path import _tps_greedy_solve", + DeprecationWarning, + stacklevel=2, + ) + return __tps_greedy_solve(data, k=k) From 2109cfb1e7d6ffccaad24e440269b6bf4f5c08c8 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:33:11 +0100 Subject: [PATCH 36/44] Move imports to top of module --- src/lmd/segmentation.py | 56 ++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/lmd/segmentation.py b/src/lmd/segmentation.py index c89fd15..d567b28 100644 --- a/src/lmd/segmentation.py +++ b/src/lmd/segmentation.py @@ -6,6 +6,33 @@ from numba import njit, prange, types from scipy.sparse import coo_array +# ============================================================================= +# Deprecation Aliases - Path Optimization Functions +# ============================================================================= +# These functions have been moved to lmd.path module. +# Imports from lmd.segmentation are deprecated and will be removed in v3.0.0 +from lmd.path import ( + _get_closest as __get_closest, +) +from lmd.path import ( + _get_nodes as __get_nodes, +) +from lmd.path import ( + _tps_greedy_solve as __tps_greedy_solve, +) +from lmd.path import ( + assign_vertices as _assign_vertices, +) +from lmd.path import ( + calc_len as _calc_len, +) +from lmd.path import ( + tsp_greedy_solve as _tsp_greedy_solve, +) +from lmd.path import ( + tsp_hilbert_solve as _tsp_hilbert_solve, +) + # TODO: Rename index_list to index_dict to correctly represent type # TODO: Rename index_list to index_dict to correctly represent type @@ -154,35 +181,6 @@ def get_coordinate_form(classes, coords_lookup, debug=False): return center, length, coords_filtered -# ============================================================================= -# Deprecation Aliases - Path Optimization Functions -# ============================================================================= -# These functions have been moved to lmd.path module. -# Imports from lmd.segmentation are deprecated and will be removed in v3.0.0 - -from lmd.path import ( - _get_closest as __get_closest, -) -from lmd.path import ( - _get_nodes as __get_nodes, -) -from lmd.path import ( - _tps_greedy_solve as __tps_greedy_solve, -) -from lmd.path import ( - assign_vertices as _assign_vertices, -) -from lmd.path import ( - calc_len as _calc_len, -) -from lmd.path import ( - tsp_greedy_solve as _tsp_greedy_solve, -) -from lmd.path import ( - tsp_hilbert_solve as _tsp_hilbert_solve, -) - - def calc_len(data): """Deprecated: Use lmd.path.calc_len instead.""" warnings.warn( From fbd1a599639c8e1a6582b904c3465d53501d235a Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:40:38 +0100 Subject: [PATCH 37/44] [Test] Move unittests to test_path.py and validate that they are equivalent --- tests/unit/test_lmd/test_path.py | 170 +++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 tests/unit/test_lmd/test_path.py diff --git a/tests/unit/test_lmd/test_path.py b/tests/unit/test_lmd/test_path.py new file mode 100644 index 0000000..4c0e3c0 --- /dev/null +++ b/tests/unit/test_lmd/test_path.py @@ -0,0 +1,170 @@ +"""Tests for lmd.path module - path optimization functions.""" + +from typing import Optional + +import numpy as np +import pytest + +from lmd.path import ( + _get_closest, + _get_nodes, + assign_vertices, + calc_len, + tsp_hilbert_solve, +) + +ATOL = 1e-10 + + +@pytest.mark.parametrize( + ("data", "p"), + [ + # Two points - should return valid ordering + (np.array([[0.0, 0.0], [1.0, 1.0]]), 3), + # Four corner points + (np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]), 3), + # Linear points + (np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]]), 3), + # More points with different p value + (np.array([[0.0, 0.0], [0.25, 0.25], [0.5, 0.5], [0.75, 0.75], [1.0, 1.0]]), 4), + ], + ids=("two_points", "four_corners", "linear_points", "five_points_p4"), +) +def test_tsp_hilbert_solve(data: np.ndarray, p: int) -> None: + """Test `tsp_hilbert_solve` returns valid ordering""" + # Act + result = tsp_hilbert_solve(data=data, p=p) + + # Assert - result should be a permutation of indices + assert len(result) == len(data) + assert set(result) == set(range(len(data))) + + +@pytest.mark.parametrize( + ("used", "choices", "world_size", "expected_result"), + [ + # First element not in used is returned + ([], [0, 1, 2], 10, 0), + # First element is used, return second + ([0], [0, 1, 2], 10, 1), + # First two elements used, return third + ([0, 1], [0, 1, 2], 10, 2), + # All elements are used, return None + ([0, 1, 2], [0, 1, 2], 10, None), + # First available element is -1, return None + ([0], [0, -1, 2], 10, None), + # -1 at start and not in used returns None + ([], [-1, 1, 2], 10, None), + # Skip used elements to find first available + ([0, 2], [0, 2, 1, 3], 10, 1), + # Empty choices returns None + ([0], [], 10, None), + ], + ids=( + "first_available", + "skip_one_used", + "skip_two_used", + "all_used", + "minus_one_element", + "minus_one_first", + "non_sequential_choices", + "empty_choices", + ), +) +def test__get_closest(used: list[int], choices: list[int], world_size: int, expected_result: Optional[int]) -> None: + """Test `_get_closest` for finding first unused element from choices""" + # Act + result = _get_closest(used=used, choices=choices, world_size=world_size) + + # Assert + assert result == expected_result + + +# TODO: Rename test when fixing function name +# Note: _tps_greedy_solve and tsp_greedy_solve require umap dependency and are tested in integration tests + + +@pytest.mark.parametrize( + ("data", "sorted_data", "expected_nodes"), + [ + # Single element + (np.array([[0, 0]]), np.array([[0, 0]]), [0]), + # Already sorted - indices are 0, 1, 2 + ( + np.array([[0, 0], [1, 1], [2, 2]]), + np.array([[0, 0], [1, 1], [2, 2]]), + [0, 1, 2], + ), + # Reversed order - indices map sorted back to original positions + ( + np.array([[2, 2], [1, 1], [0, 0]]), + np.array([[0, 0], [1, 1], [2, 2]]), + [2, 1, 0], + ), + # Arbitrary reordering + ( + np.array([[1, 0], [0, 1], [2, 2], [0, 0]]), + np.array([[0, 0], [1, 0], [0, 1], [2, 2]]), + [3, 0, 1, 2], + ), + ], + ids=("single_element", "already_sorted", "reversed", "arbitrary_reorder"), +) +def test__get_nodes(data: np.ndarray, sorted_data: np.ndarray, expected_nodes: list[int]) -> None: + """Test `_get_nodes` for calculating index array mapping sorted to original data""" + result = _get_nodes(data=data, sorted_data=sorted_data) + + assert list(result) == expected_nodes + + +@pytest.mark.parametrize( + ("hilbert_points", "data_rounded", "expected_order"), + [ + # Single point + (np.array([[0, 0]]), np.array([[0, 0]]), np.array([0])), + # Two points in hilbert order + (np.array([[0, 0], [1, 0]]), np.array([[0, 0], [1, 0]]), np.array([0, 1])), + # Two points in reverse hilbert order - order reflects hilbert traversal + (np.array([[0, 0], [1, 0]]), np.array([[1, 0], [0, 0]]), np.array([1, 0])), + # Three points with reordering + ( + np.array([[0, 0], [0, 1], [1, 1]]), + np.array([[1, 1], [0, 0], [0, 1]]), + np.array([1, 2, 0]), + ), + # Points not all on hilbert curve - only matching points are ordered + ( + np.array([[0, 0], [1, 0], [2, 0]]), + np.array([[0, 0], [2, 0]]), + np.array([0, 1]), + ), + ], + ids=( + "single_point", + "two_points_in_order", + "two_points_reversed", + "three_points_reorder", + "partial_match", + ), +) +def test_assign_vertices(hilbert_points: np.ndarray, data_rounded: np.ndarray, expected_order: np.ndarray) -> None: + """Test `assign_vertices` for ordering data points along hilbert curve""" + result = assign_vertices(hilbert_points=hilbert_points, data_rounded=data_rounded) + + assert np.array_equal(result[: len(expected_order)], expected_order) + + +@pytest.mark.parametrize( + ("array", "expected_result"), + [ + (np.array([[0], [1], [2]]), 2), + (np.array([[0, 0], [1, 0], [2, 0]]), 2), + (np.array([[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]), 4), + ], + ids=("1d_like", "linear", "square_circular_path"), +) +def test_calc_len(array: np.ndarray, expected_result: float) -> None: + """Test `calc_len` for the computation of a path length""" + result = calc_len(array) + + assert result == expected_result From c44bd237b09313279580ffbf3482ade4d545eba5 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:41:37 +0100 Subject: [PATCH 38/44] [Test] Add tests for deprecation warnings --- tests/unit/test_lmd/test_segmentation.py | 225 +++++++++-------------- 1 file changed, 82 insertions(+), 143 deletions(-) diff --git a/tests/unit/test_lmd/test_segmentation.py b/tests/unit/test_lmd/test_segmentation.py index 14bc766..3f5d16c 100644 --- a/tests/unit/test_lmd/test_segmentation.py +++ b/tests/unit/test_lmd/test_segmentation.py @@ -1,4 +1,4 @@ -from typing import Optional +import warnings import numpy as np import pytest @@ -7,13 +7,8 @@ _create_coord_index, _create_coord_index_sparse, _filter_coord_index, - _get_closest, - _get_nodes, _numba_accelerator_coord_calculation, - assign_vertices, - calc_len, get_coordinate_form, - tsp_hilbert_solve, ) ATOL = 1e-10 @@ -251,155 +246,99 @@ def test_get_coordinate_form( assert np.allclose(cf, ecf, atol=ATOL) -@pytest.mark.parametrize( - ("data", "p"), - [ - # Two points - should return valid ordering - (np.array([[0.0, 0.0], [1.0, 1.0]]), 3), - # Four corner points - (np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]), 3), - # Linear points - (np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]]), 3), - # More points with different p value - (np.array([[0.0, 0.0], [0.25, 0.25], [0.5, 0.5], [0.75, 0.75], [1.0, 1.0]]), 4), - ], - ids=("two_points", "four_corners", "linear_points", "five_points_p4"), -) -def test_tsp_hilbert_solve(data: np.ndarray, p: int) -> None: - """Test `tsp_hilbert_solve` returns valid ordering""" - # Act - result = tsp_hilbert_solve(data=data, p=p) +# ============================================================================= +# Deprecation Warning Tests +# ============================================================================= +# These tests verify that importing path functions from lmd.segmentation +# raises deprecation warnings. The actual function tests are in test_path.py. - # Assert - result should be a permutation of indices - assert len(result) == len(data) - assert set(result) == set(range(len(data))) +class TestDeprecationWarnings: + """Test that deprecated imports from lmd.segmentation raise DeprecationWarning.""" -@pytest.mark.parametrize( - ("used", "choices", "world_size", "expected_result"), - [ - # First element not in used is returned - ([], [0, 1, 2], 10, 0), - # First element is used, return second - ([0], [0, 1, 2], 10, 1), - # First two elements used, return third - ([0, 1], [0, 1, 2], 10, 2), - # All elements are used, return None - ([0, 1, 2], [0, 1, 2], 10, None), - # First available element is -1, return None - ([0], [0, -1, 2], 10, None), - # -1 at start and not in used returns None - ([], [-1, 1, 2], 10, None), - # Skip used elements to find first available - ([0, 2], [0, 2, 1, 3], 10, 1), - # Empty choices returns None - ([0], [], 10, None), - ], - ids=( - "first_available", - "skip_one_used", - "skip_two_used", - "all_used", - "minus_one_element", - "minus_one_first", - "non_sequential_choices", - "empty_choices", - ), -) -def test__get_closest(used: list[int], choices: list[int], world_size: int, expected_result: Optional[int]) -> None: - """Test `_get_closest` for finding first unused element from choices""" - # Act - result = _get_closest(used=used, choices=choices, world_size=world_size) + def test_calc_len_deprecation_warning(self) -> None: + """Test that calc_len raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import calc_len - # Assert - assert result == expected_result + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + calc_len(np.array([[0, 0], [1, 1]])) + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "calc_len has been moved to lmd.path" in str(w[0].message) -# TODO: Rename test when fixing function name -# Note: _tps_greedy_solve and tsp_greedy_solve require umap dependency and are tested in integration tests + def test_tsp_hilbert_solve_deprecation_warning(self) -> None: + """Test that tsp_hilbert_solve raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import tsp_hilbert_solve + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + tsp_hilbert_solve(np.array([[0.0, 0.0], [1.0, 1.0]]), p=3) -@pytest.mark.parametrize( - ("data", "sorted_data", "expected_nodes"), - [ - # Single element - (np.array([[0, 0]]), np.array([[0, 0]]), [0]), - # Already sorted - indices are 0, 1, 2 - ( - np.array([[0, 0], [1, 1], [2, 2]]), - np.array([[0, 0], [1, 1], [2, 2]]), - [0, 1, 2], - ), - # Reversed order - indices map sorted back to original positions - ( - np.array([[2, 2], [1, 1], [0, 0]]), - np.array([[0, 0], [1, 1], [2, 2]]), - [2, 1, 0], - ), - # Arbitrary reordering - ( - np.array([[1, 0], [0, 1], [2, 2], [0, 0]]), - np.array([[0, 0], [1, 0], [0, 1], [2, 2]]), - [3, 0, 1, 2], - ), - ], - ids=("single_element", "already_sorted", "reversed", "arbitrary_reorder"), -) -def test__get_nodes(data: np.ndarray, sorted_data: np.ndarray, expected_nodes: list[int]) -> None: - """Test `_get_nodes` for calculating index array mapping sorted to original data""" - result = _get_nodes(data=data, sorted_data=sorted_data) + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "tsp_hilbert_solve has been moved to lmd.path" in str(w[0].message) - assert list(result) == expected_nodes + def test_tsp_greedy_solve_deprecation_warning(self) -> None: + """Test that tsp_greedy_solve raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import tsp_greedy_solve + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + # Note: tsp_greedy_solve requires umap, so we just check the warning is raised + # before the actual call would happen + tsp_greedy_solve(np.array([[0.0, 0.0], [1.0, 1.0]])) -@pytest.mark.parametrize( - ("hilbert_points", "data_rounded", "expected_order"), - [ - # Single point - (np.array([[0, 0]]), np.array([[0, 0]]), np.array([0])), - # Two points in hilbert order - (np.array([[0, 0], [1, 0]]), np.array([[0, 0], [1, 0]]), np.array([0, 1])), - # Two points in reverse hilbert order - order reflects hilbert traversal - (np.array([[0, 0], [1, 0]]), np.array([[1, 0], [0, 0]]), np.array([1, 0])), - # Three points with reordering - ( - np.array([[0, 0], [0, 1], [1, 1]]), - np.array([[1, 1], [0, 0], [0, 1]]), - np.array([1, 2, 0]), - ), - # Points not all on hilbert curve - only matching points are ordered - ( - np.array([[0, 0], [1, 0], [2, 0]]), - np.array([[0, 0], [2, 0]]), - np.array([0, 1]), - ), - ], - ids=( - "single_point", - "two_points_in_order", - "two_points_reversed", - "three_points_reorder", - "partial_match", - ), -) -def test_assign_vertices(hilbert_points: np.ndarray, data_rounded: np.ndarray, expected_order: np.ndarray) -> None: - """Test `assign_vertices` for ordering data points along hilbert curve""" - result = assign_vertices(hilbert_points=hilbert_points, data_rounded=data_rounded) + assert len(w) >= 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "tsp_greedy_solve has been moved to lmd.path" in str(w[0].message) - assert np.array_equal(result[: len(expected_order)], expected_order) + def test_assign_vertices_deprecation_warning(self) -> None: + """Test that assign_vertices raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import assign_vertices + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assign_vertices(np.array([[0, 0]]), np.array([[0, 0]])) -@pytest.mark.parametrize( - ("array", "expected_result"), - [ - (np.array([[0], [1], [2]]), 2), - (np.array([[0, 0], [1, 0], [2, 0]]), 2), - (np.array([[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]), 4), - ], - ids=("1d_like", "linear", "square_circular_path"), -) -def test_calc_len(array: np.ndarray, expected_result: float) -> None: - """Test `calc_len` for the computation of a path length""" - result = calc_len(array) + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "assign_vertices has been moved to lmd.path" in str(w[0].message) + + def test__get_closest_deprecation_warning(self) -> None: + """Test that _get_closest raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import _get_closest + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + _get_closest([], [0, 1, 2], 10) + + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "_get_closest has been moved to lmd.path" in str(w[0].message) + + def test__get_nodes_deprecation_warning(self) -> None: + """Test that _get_nodes raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import _get_nodes + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + _get_nodes(np.array([[0, 0]]), np.array([[0, 0]])) + + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "_get_nodes has been moved to lmd.path" in str(w[0].message) + + def test__tps_greedy_solve_deprecation_warning(self) -> None: + """Test that _tps_greedy_solve raises DeprecationWarning when imported from segmentation.""" + from lmd.segmentation import _tps_greedy_solve + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + _tps_greedy_solve(np.array([[0.0, 0.0], [1.0, 1.0]])) - assert result == expected_result + assert len(w) >= 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "_tps_greedy_solve has been moved to lmd.path" in str(w[0].message) From 2768ca4594ec63804626a2d5fa38711c9fdc6fb8 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 00:52:38 +0100 Subject: [PATCH 39/44] [Refactor] Fix typo. Rename _tps_greedy_solve to _tsp_greedy_solve and update deprecation warning --- src/lmd/path.py | 7 +++---- src/lmd/segmentation.py | 10 +++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/lmd/path.py b/src/lmd/path.py index bbd8d0b..38f98e2 100644 --- a/src/lmd/path.py +++ b/src/lmd/path.py @@ -94,10 +94,9 @@ def _get_closest(used, choices, world_size): # all choices have been taken, return closest free index due to local optimality -# TODO: Rename to TSP (traveling sales person) # TODO: Add type hints # TODO: Add umap as optional dependency -def _tps_greedy_solve(data, k=100): +def _tsp_greedy_solve(data, k=100): samples = len(data) print(f"{samples} nodes left") @@ -139,7 +138,7 @@ def _tps_greedy_solve(data, k=100): # join lists - return np.concatenate([data[nodes], _tps_greedy_solve(node_data_left, k=k)]) + return np.concatenate([data[nodes], _tsp_greedy_solve(node_data_left, k=k)]) # TODO: Add type hints @@ -175,7 +174,7 @@ def tsp_greedy_solve(node_list, k=100, return_sorted=False): """ - sorted_nodes = _tps_greedy_solve(node_list) + sorted_nodes = _tsp_greedy_solve(node_list) if return_sorted: return sorted_nodes diff --git a/src/lmd/segmentation.py b/src/lmd/segmentation.py index d567b28..6ba3e97 100644 --- a/src/lmd/segmentation.py +++ b/src/lmd/segmentation.py @@ -18,7 +18,7 @@ _get_nodes as __get_nodes, ) from lmd.path import ( - _tps_greedy_solve as __tps_greedy_solve, + _tsp_greedy_solve as __tsp_greedy_solve, ) from lmd.path import ( assign_vertices as _assign_vertices, @@ -254,12 +254,12 @@ def _get_nodes(data, sorted_data): def _tps_greedy_solve(data, k=100): - """Deprecated: Use lmd.path._tps_greedy_solve instead.""" + """Deprecated: Use lmd.path._tsp_greedy_solve instead.""" warnings.warn( - "_tps_greedy_solve has been moved to lmd.path. " + "_tps_greedy_solve has been moved to lmd.path and renamed to _tsp_greedy_solve. " "Import from lmd.segmentation is deprecated and will be removed in v3.0.0. " - "Please use: from lmd.path import _tps_greedy_solve", + "Please use: from lmd.path import _tsp_greedy_solve", DeprecationWarning, stacklevel=2, ) - return __tps_greedy_solve(data, k=k) + return __tsp_greedy_solve(data, k=k) From bcfc43326b04951e8bad0575db0219b033f7baf7 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 01:00:04 +0100 Subject: [PATCH 40/44] Add optional umap dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index ef14f37..b31372d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ classifiers = [ dependencies = {file = ["requirements.txt"]} [tool.setuptools.dynamic.optional-dependencies] +umap = {"umap-learn"} dev_only = {file = ["requirements_dev.txt"]} doc = {file = ["requirements_doc.txt"]} test = {file = ["requirements_test.txt"]} From e6cd5b8f5cda3c7eb80c991ec0783f396bc33771 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 01:01:42 +0100 Subject: [PATCH 41/44] [Dep] Add umap as optional dependency --- pyproject.toml | 2 +- requirements_umap.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100755 requirements_umap.txt diff --git a/pyproject.toml b/pyproject.toml index b31372d..42780e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ classifiers = [ dependencies = {file = ["requirements.txt"]} [tool.setuptools.dynamic.optional-dependencies] -umap = {"umap-learn"} +umap = {file = ["requirements_umap.txt"]} dev_only = {file = ["requirements_dev.txt"]} doc = {file = ["requirements_doc.txt"]} test = {file = ["requirements_test.txt"]} diff --git a/requirements_umap.txt b/requirements_umap.txt new file mode 100755 index 0000000..51f1982 --- /dev/null +++ b/requirements_umap.txt @@ -0,0 +1,2 @@ +pre-commit +pytest From 87be26ce7efce114dc472777646c363f0e2320a0 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 01:03:36 +0100 Subject: [PATCH 42/44] Install optional umap dependency for CI/CD tests --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 4da6e90..6cd4ba7 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -27,7 +27,7 @@ jobs: - name: Install package run: | pip install uv - uv pip install --system ".[tests, dev]" + uv pip install --system ".[tests, dev, umap]" - name: Run pre-commit checks run: | pre-commit install From 90ab2e632e12d002304b6d0ca69027c956e6cba7 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 01:04:36 +0100 Subject: [PATCH 43/44] [Dep] Fix requirements file and specify umap --- requirements_umap.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements_umap.txt b/requirements_umap.txt index 51f1982..cfb6acd 100755 --- a/requirements_umap.txt +++ b/requirements_umap.txt @@ -1,2 +1 @@ -pre-commit -pytest +umap-learn From a42e19791bdcd5720aec564dca3902941657a038 Mon Sep 17 00:00:00 2001 From: Lucas Diedrich Date: Thu, 15 Jan 2026 01:05:09 +0100 Subject: [PATCH 44/44] [feature] Add ImportError when umap is not installed --- src/lmd/path.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/lmd/path.py b/src/lmd/path.py index 38f98e2..a1536d5 100644 --- a/src/lmd/path.py +++ b/src/lmd/path.py @@ -104,7 +104,12 @@ def _tsp_greedy_solve(data, k=100): if samples == 1: return data - import umap + try: + import umap + except ImportError: + raise ImportError( + "umap-learn is required for this function. " "Install it with: pip install py-lmd[umap]" + ) from None knn_index, knn_dist, _ = umap.umap_.nearest_neighbors( data, n_neighbors=k, metric="euclidean", metric_kwds={}, angular=True, random_state=np.random.RandomState(42)