openproblems-bio · JGarnica22 · Apr 15, 2025 · Apr 15, 2025 · Apr 15, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # task_batch_integration devel
 
+## New functionality
+* Add `metrics/cilisi` new metric component.
+    - ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing
+        the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring
+        overcorrected datasets with removed cell type signals.
+        We propose adding this metric to substitute iLISI.
+
 ## New functionality
 
 * Added `metrics/kbet_pg` and `metrics/kbet_pg_label` components (PR #52).

diff --git a/src/metrics/cilisi/config.vsh.yaml b/src/metrics/cilisi/config.vsh.yaml
@@ -0,0 +1,105 @@
+# The API specifies which type of component this is.
+# It contains specifications for:
+#   - The input/output files
+#   - Common parameters
+#   - A unit test
+__merge__: ../../api/comp_metric.yaml
+
+# A unique identifier for your component (required).
+# Can contain only lowercase letters or underscores.
+name: cilisi
+
+
+
+# Metadata for your component
+info:
+  metrics:
+      # A unique identifier for your metric (required).
+      # Can contain only lowercase letters or underscores.
+    - name: cilisi
+      # A relatively short label, used when rendering visualisations (required)
+      label: CiLISI
+      # A one sentence summary of how this metric works (required). Used when 
+      # rendering summary tables.
+      summary: Cell-type aware version of iLISI (Local inverse Simpson's Index).
+                iLISI is computed separately for each cell type or cluster, normalized between 0 and 1, and averaged across all cells (global mean).
+                By default, CiLISI is calculated only for groups with at least 10 cells and 2 distinct batch labels (configurable).
+      # A multi-line description of how this component works (required). Used
+      # when rendering reference documentation.
+      description: |
+        ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing
+        the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring
+        overcorrected datasets with removed cell type signals.
+      references:
+        doi: 10.1038/s41467-024-45240-z
+      links:
+        # URL to the documentation for this metric (required).
+        documentation: https://github.com/carmonalab/scIntegrationMetrics
+        # URL to the code repository for this metric (required).
+        repository: https://github.com/carmonalab/scIntegrationMetrics
+      # The minimum possible value for this metric (required)
+      min: 0
+      # The maximum possible value for this metric (required)
+      max: 1
+      # Whether a higher value represents a 'better' solution (required)
+      maximize: true
+
+    - name: cilisi_means
+      # A relatively short label, used when rendering visualisations (required)
+      label: CiLISI_means
+      # A one sentence summary of how this metric works (required). Used when 
+      # rendering summary tables.
+      summary: As CiLISI, but returns mean of per-group CiLISI values (i.e., average of the means per group). instead of a global average.
+      # A multi-line description of how this component works (required). Used
+      # when rendering reference documentation.
+      description: |
+        ciLISI measures batch mixing in a cell type-aware manner by computing iLISI within each cell type and normalizing
+        the scores between 0 and 1. Unlike iLISI, ciLISI preserves sensitivity to biological variance and avoids favoring
+        overcorrected datasets with removed cell type signals.
+      references:
+        doi: 10.1038/s41467-024-45240-z
+      links:
+        # URL to the documentation for this metric (required).
+        documentation: https://github.com/carmonalab/scIntegrationMetrics
+        # URL to the code repository for this metric (required).
+        repository: https://github.com/carmonalab/scIntegrationMetrics
+      # The minimum possible value for this metric (required)
+      min: 0
+      # The maximum possible value for this metric (required)
+      max: 1
+      # Whether a higher value represents a 'better' solution (required)
+      maximize: true
+
+# Component-specific parameters (optional)
+# arguments:
+#   - name: "--n_neighbors"
+#     type: "integer"
+#     default: 5
+#     description: Number of neighbors to use.
+
+# Resources required to run the component
+resources:
+  # The script of your component (required)
+  - type: r_script
+    path: script.R
+  # Additional resources your script needs (optional)
+  # - type: file
+  #   path: weights.pt
+
+engines:
+  # Specifications for the Docker image for this component.
+  - type: docker
+    image: openproblems/base_r:1.0.0
+    # Add custom dependencies here (optional). For more information, see
+    # https://viash.io/reference/config/engines/docker/#setup .
+    setup:
+       - type: r
+         github: https://github.com/carmonalab/[email protected]
+
+runners:
+  # This platform allows running the component natively
+  - type: executable
+  # Allows turning the component into a Nextflow module / pipeline.
+  - type: nextflow
+    directives:
+      label: [midtime,midmem,midcpu]
diff --git a/src/metrics/cilisi/script.R b/src/metrics/cilisi/script.R
@@ -0,0 +1,48 @@
+library(anndata)
+library(scIntegrationMetrics)
+
+## VIASH START
+par <- list(
+  input_integrated = "resources_test/task_batch_integration/cxg_immune_cell_atlas/integrated_processed.h5ad",
+  input_solution = "resources_test/task_batch_integration/cxg_immune_cell_atlas/solution.h5ad",
+  output = "output.h5ad"
+)
+meta <- list(
+  name = "cilisi"
+)
+## VIASH END
+
+cat("Reading input files\n")
+adata <- anndata::read_h5ad(par[["input_integrated"]])
+solution <- anndata::read_h5ad(par[["input_solution"]])
+embeddings <- adata$obsm[["X_emb"]]
+metadata <- solution$obs
+
+cat("Compute CiLISI metrics...\n")
+lisisplit <-
+  scIntegrationMetrics::compute_lisi_splitBy(
+                                            X = embeddings,
+                                            meta_data = metadata,
+                                            label_colnames = "batch",
+                                            perplexity = 30,
+                                            split_by_colname = "cell_type",
+                                            normalize = TRUE,
+                                            min.cells.split = 10,
+                                            min.vars.label = 2)
+# average CiLISI
+cilisi <- mean(unlist(lisisplit))
+# Mean per cell type
+cilisi_means <- mean(sapply(lisisplit, function(x) mean(x[, 1])))
+
+cat("Write output AnnData to file\n")
+output <- anndata::AnnData(
+  shape = c(1,2),
+  uns = list(
+    dataset_id = adata$uns[["dataset_id"]],
+    normalization_id = adata$uns[["normalization_id"]],
+    method_id = adata$uns[["method_id"]],
+    metric_ids = c("cilisi", "cilisi_means"),
+    metric_values = list(cilisi, cilisi_means)
+  )
+)
+output$write_h5ad(par[["output"]], compression = "gzip")