diff --git a/_viash.yaml b/_viash.yaml
index ad91fd543..f6472c608 100644
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -11,9 +11,9 @@ keywords: [openproblems, benchmarking, single-cell omics]
 references:
   doi:
     # Malte Luecken, Scott Gigante, Daniel Burkhardt, Robrecht Cannoodt, et al.
-    # Defining and benchmarking open problems in single-cell analysis, 
-    # 03 April 2024, PREPRINT (Version 1) available at Research Square [https://doi.org/10.21203/rs.3.rs-4181617/v1]
-    - 10.21203/rs.3.rs-4181617/v1
+    # Defining and benchmarking open problems in single-cell analysis.
+    # Nat Biotechnol 43, 1035–1040 (2025).
+    - 10.1038/s41587-025-02694-w
 
 links:
   issue_tracker: https://github.com/openproblems-bio/openproblems/issues
diff --git a/common b/common
index f01ff2170..0effaf2ad 160000
--- a/common
+++ b/common
@@ -1 +1 @@
-Subproject commit f01ff2170161295e89014ee5453c61b29b4e4e77
+Subproject commit 0effaf2addbb8df6c0d11caae04d0ca63aa6345d
diff --git a/scripts/create_resources/reprocess_task_results_v4.sh b/scripts/create_resources/reprocess_task_results_v4.sh
new file mode 100755
index 000000000..dd6171da0
--- /dev/null
+++ b/scripts/create_resources/reprocess_task_results_v4.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+set -e
+
+OUT_DIR="resources"
+
+echo ">>> Fetching raw results..."
+aws s3 sync --profile op \
+  s3://openproblems-data/resources/ \
+  "$OUT_DIR/" \
+  --exclude "*" \
+  --include "**/results/run_*/*" \
+  --delete
+
+echo ">>> Patch state.yaml files..."
+# fix state.yaml id and output_trace
+python <<HERE
+import os
+import re
+import glob
+
+def update_state_file(file_path, new_id):
+    with open(file_path, 'r') as file:
+        content = file.read()
+    
+    # if output_trace is missing, add it
+    if 'output_trace:' not in content:
+        content += "\noutput_trace: !file trace.txt\n"
+      
+    # replace the id with the value of the glob ** pattern
+    content = re.sub(r'id: .+', f'id: {new_id}/processed', content)
+
+    with open(file_path, 'w') as file:
+        file.write(content)
+
+# find all state.yaml files
+state_files = glob.glob('resources/**/state.yaml', recursive=True)
+for state_file in state_files:
+    # extract the id from the path
+    match = re.search(r'resources/(.+?)/state\.yaml', state_file)
+    if match:
+        new_id = match.group(1)
+        update_state_file(state_file, new_id)
+        print(f"Updated {state_file} with id: {new_id}")
+    else:
+        print(f"Could not extract id from {state_file}, skipping.")
+HERE
+
+echo ">>> Creating params.yaml..."
+cat > /tmp/params.yaml << HERE
+input_states: resources/*/results/run_*/state.yaml
+rename_keys: 'input_task_info:output_task_info;input_dataset_info:output_dataset_info;input_method_configs:output_method_configs;input_metric_configs:output_metric_configs;input_scores:output_scores;input_trace:output_trace'
+output_state: '\$id/state.yaml'
+settings: '{"output_combined": "\$id/output_combined.json", "output_report": "\$id/output_report.html", "output_task_info": "\$id/output_task_info.json", "output_dataset_info": "\$id/output_dataset_info.json", "output_method_info": "\$id/output_method_info.json", "output_metric_info": "\$id/output_metric_info.json", "output_results": "\$id/output_results.json", "output_scores": "\$id/output_quality_control.json"}'
+publish_dir: "$OUT_DIR"
+HERE
+
+echo ">>> Processing results..."
+nextflow run target/nextflow/reporting/process_task_results/main.nf \
+  -profile docker \
+  -params-file /tmp/params.yaml \
+  -c common/nextflow_helpers/labels_ci.config \
+  -entry auto \
+  -resume
+
+# find all files in $OUT with the pattern output_report.html
+echo ">>> List reports..."
+find "$OUT_DIR" -name "output_report.html"
+
+# echo ">>> Uploading processed results to S3..."
+# aws s3 sync --profile op \
+#   "resources_test/openproblems/task_results_v4/" \
+#   "s3://openproblems-data/resources_test/openproblems/task_results_v4/" \
+#   --delete --dryrun
+
+# echo
+# echo ">>> Done!"
diff --git a/scripts/create_resources/task_results_v4.sh b/scripts/create_resources/task_results_v4.sh
new file mode 100755
index 000000000..8bcb0220a
--- /dev/null
+++ b/scripts/create_resources/task_results_v4.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+set -e
+
+OUT_DIR="resources_test/openproblems/task_results_v4"
+
+echo ">>> Fetching raw results..."
+aws s3 sync --profile op \
+  s3://openproblems-data/resources/task_batch_integration/results/run_2025-01-23_18-03-16/ \
+  "$OUT_DIR/raw/" \
+  --delete
+
+echo
+echo ">>> Processing results..."
+if [ -d "$OUT_DIR/processed" ]; then rm -Rf $OUT_DIR/processed; fi
+nextflow run target/nextflow/reporting/process_task_results/main.nf \
+  -profile docker \
+  --input_task_info $OUT_DIR/raw/task_info.yaml \
+  --input_dataset_info $OUT_DIR/raw/dataset_uns.yaml \
+  --input_method_configs $OUT_DIR/raw/method_configs.yaml \
+  --input_metric_configs $OUT_DIR/raw/metric_configs.yaml \
+  --input_scores $OUT_DIR/raw/score_uns.yaml \
+  --input_trace $OUT_DIR/raw/trace.txt \
+  --output_state state.yaml \
+  --publishDir $OUT_DIR/processed
+
+echo ">>> Uploading processed results to S3..."
+aws s3 sync --profile op \
+  "resources_test/openproblems/task_results_v4/" \
+  "s3://openproblems-data/resources_test/openproblems/task_results_v4/" \
+  --delete --dryrun
+
+echo
+echo ">>> Done!"
diff --git a/src/reporting/combine_output/config.vsh.yaml b/src/reporting/combine_output/config.vsh.yaml
new file mode 100644
index 000000000..87673b9e0
--- /dev/null
+++ b/src/reporting/combine_output/config.vsh.yaml
@@ -0,0 +1,102 @@
+name: combine_output
+namespace: reporting
+description: Combine task outputs into a single JSON
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input_task_info
+        type: file
+        description: Task info file
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/task_info.json
+        required: true
+        example: resources_test/openproblems/task_results_v4/processed/task_info.json
+      - name: --input_dataset_info
+        type: file
+        description: Dataset info file
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/dataset_info.json
+        required: true
+        example: resources_test/openproblems/task_results_v4/processed/dataset_info.json
+      - name: --input_method_info
+        type: file
+        description: Method info file
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/method_info.json
+        required: true
+        example: resources_test/openproblems/task_results_v4/processed/method_info.json
+      - name: --input_metric_info
+        type: file
+        description: Metric info file
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/metric_info.json
+        required: true
+        example: resources_test/openproblems/task_results_v4/processed/metric_info.json
+      - name: --input_results
+        type: file
+        description: Results file
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/results.json
+        required: true
+        example: resources_test/openproblems/task_results_v4/processed/results.json
+      - name: --input_quality_control
+        type: file
+        description: Quality control file
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/quality_control.json
+        required: true
+        example: resources_test/openproblems/task_results_v4/processed/quality_control.json
+
+  - name: Outputs
+    arguments:
+    - name: --output
+      type: file
+      direction: output
+      description: Combined output JSON
+      default: combined_output.json
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/combined_output.json
+
+resources:
+  - type: r_script
+    path: script.R
+  - path: /common/schemas
+    dest: schemas
+
+test_resources:
+  - type: python_script
+    path: /common/component_tests/run_and_check_output.py
+  - path: /resources_test/openproblems/task_results_v4
+    dest: resources_test/openproblems/task_results_v4
+
+engines:
+  - type: docker
+    image: openproblems/base_r:1
+    setup:
+      - type: apt
+        packages:
+        - nodejs
+        - npm
+      - type: docker
+        run: npm install -g ajv-cli
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [lowmem, lowtime, lowcpu]
diff --git a/src/reporting/combine_output/script.R b/src/reporting/combine_output/script.R
new file mode 100644
index 000000000..32734c3c8
--- /dev/null
+++ b/src/reporting/combine_output/script.R
@@ -0,0 +1,105 @@
+## VIASH START
+processed_dir <- "resources_test/openproblems/task_results_v4/processed"
+
+par <- list(
+  # Inputs
+  input_task_info = paste0(processed_dir, "/task_info.json"),
+  input_quality_control = paste0(processed_dir, "/quality_control.json"),
+  input_metric_info = paste0(processed_dir, "/metric_info.json"),
+  input_method_info = paste0(processed_dir, "/method_info.json"),
+  input_dataset_info = paste0(processed_dir, "/dataset_info.json"),
+  input_results = paste0(processed_dir, "/results.json"),
+  # Outputs
+  output = "task_results.json"
+)
+## VIASH END
+
+################################################################################
+#                              MAIN SCRIPT
+################################################################################
+
+cat("====== Combine output ======\n")
+
+cat("\n>>> Reading input files...\n")
+cat("Reading task info from '", par$input_task_info, "'...\n", sep = "")
+task_info <- jsonlite::read_json(par$input_task_info)
+
+cat(
+  "Reading quality control from '",
+  par$input_quality_control,
+  "'...\n",
+  sep = ""
+)
+quality_control <- jsonlite::read_json(par$input_quality_control)
+
+cat("Reading metric info from '", par$input_metric_info, "'...\n", sep = "")
+metric_info <- jsonlite::read_json(par$input_metric_info)
+
+cat("Reading method info from '", par$input_method_info, "'...\n", sep = "")
+method_info <- jsonlite::read_json(par$input_method_info)
+
+cat("Reading dataset info from '", par$input_dataset_info, "'...\n", sep = "")
+dataset_info <- jsonlite::read_json(par$input_dataset_info)
+
+cat("Reading results from '", par$input_results, "'...\n", sep = "")
+results <- jsonlite::read_json(par$input_results)
+
+cat("\n>>> Combining outputs...\n")
+# Create combined output according to task_results.json
+combined_output <- list(
+  task_info = task_info,
+  dataset_info = dataset_info,
+  method_info = method_info,
+  metric_info = metric_info,
+  results = results,
+  quality_control = quality_control
+)
+
+cat("\n>>> Writing output file...\n")
+cat("Writing combined output to '", par$output, "'...\n", sep = "")
+jsonlite::write_json(
+  combined_output,
+  par$output,
+  pretty = TRUE,
+  null = "null",
+  na = "null",
+  auto_unbox = TRUE
+)
+
+cat("\n>>> Validating output against schema...\n")
+results_schemas <- file.path(meta$resources_dir, "schemas", "results_v4")
+ajv_args <- paste(
+  "validate",
+  "--spec draft2020",
+  "-s",
+  file.path(results_schemas, "combined_output.json"),
+  "-r",
+  file.path(results_schemas, "task_info.json"),
+  "-r",
+  file.path(results_schemas, "dataset_info.json"),
+  "-r",
+  file.path(results_schemas, "method_info.json"),
+  "-r",
+  file.path(results_schemas, "metric_info.json"),
+  "-r",
+  file.path(results_schemas, "results.json"),
+  "-r",
+  file.path(results_schemas, "quality_control.json"),
+  "-r",
+  file.path(results_schemas, "core.json"),
+  "-d",
+  par$output
+)
+
+cat("Running validation command:", "ajv", ajv_args, "\n")
+cat("Output:\n")
+validation_result <- system2("ajv", ajv_args)
+
+if (validation_result == 0) {
+  cat("JSON validation passed successfully!\n")
+} else {
+  cat("JSON validation failed!\n")
+  stop("Output JSON does not conform to schema")
+}
+
+cat("\n>>> Done!\n")
diff --git a/src/reporting/generate_qc/config.vsh.yaml b/src/reporting/generate_qc/config.vsh.yaml
index c141575cb..17818c14f 100644
--- a/src/reporting/generate_qc/config.vsh.yaml
+++ b/src/reporting/generate_qc/config.vsh.yaml
@@ -1,47 +1,98 @@
 name: generate_qc
 namespace: reporting
 description: Generate task QC metrics
-arguments:
-  - name: --task_info
-    type: file
-    description: Task info file
-    example: resources_test/openproblems/task_results_v3/processed/task_info.json
-  - name: --method_info
-    type: file
-    description: Method info file
-    example: resources_test/openproblems/task_results_v3/processed/method_info.json
-  - name: --metric_info
-    type: file
-    description: Metric info file
-    example: resources_test/openproblems/task_results_v3/processed/metric_info.json
-  - name: --dataset_info
-    type: file
-    description: Dataset info file
-    example: resources_test/openproblems/task_results_v3/processed/dataset_info.json
-  - name: --results
-    type: file
-    description: Results file
-    example: resources_test/openproblems/task_results_v3/processed/results.json
-  - name: --output
-    type: file
-    direction: output
-    default: output.json
-    description: Output json
-    info:
-      format:
-        type: json
-        # TODO: add schema
+
+argument_groups:
+  - name: Inputs
+    arguments:
+    - name: --input_task_info
+      type: file
+      description: Task info file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/task_info.json
+      required: true
+      example: resources_test/openproblems/task_results_v4/processed/task_info.json
+    - name: --input_dataset_info
+      type: file
+      description: Dataset info file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/dataset_info.json
+      required: true
+      example: resources_test/openproblems/task_results_v4/processed/dataset_info.json
+    - name: --input_method_info
+      type: file
+      description: Method info file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/method_info.json
+      required: true
+      example: resources_test/openproblems/task_results_v4/processed/method_info.json
+    - name: --input_metric_info
+      type: file
+      description: Metric info file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/metric_info.json
+      required: true
+      example: resources_test/openproblems/task_results_v4/processed/metric_info.json
+    - name: --input_results
+      type: file
+      description: Results JSON file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/results.json
+      required: true
+      example: resources_test/openproblems/task_results_v4/processed/results.json
+
+  - name: Outputs
+    arguments:
+    - name: --output
+      type: file
+      direction: output
+      default: quality_control.json
+      description: Output quality control JSON file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/quality_control.json
+
 resources:
-  - type: python_script
-    path: script.py
+  - type: r_script
+    path: script.R
+  - path: /common/schemas
+    dest: schemas
+
 test_resources:
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
-  - path: /resources_test/openproblems/task_results_v3
-    dest: resources_test/openproblems/task_results_v3
+  - path: /resources_test/openproblems/task_results_v4
+    dest: resources_test/openproblems/task_results_v4
+
 engines:
   - type: docker
-    image: openproblems/base_python:1
+    image: openproblems/base_r:1
+    setup:
+      - type: apt
+        packages:
+        - nodejs
+        - npm
+      - type: docker
+        run: npm install -g ajv-cli
+      - type: r
+        cran:
+        - dplyr
+        - purrr
+        - readr
+        - stringr
+        - tidyr
+
 runners:
   - type: executable
   - type: nextflow
diff --git a/src/reporting/generate_qc/script.R b/src/reporting/generate_qc/script.R
new file mode 100644
index 000000000..8725e546f
--- /dev/null
+++ b/src/reporting/generate_qc/script.R
@@ -0,0 +1,816 @@
+## VIASH START
+processed_dir <- "resources_test/openproblems/task_results_v4/processed"
+
+par <- list(
+  # Inputs
+  input_task_info = paste0(processed_dir, "/task_info.json"),
+  input_method_info = paste0(processed_dir, "/method_info.json"),
+  input_metric_info = paste0(processed_dir, "/metric_info.json"),
+  input_dataset_info = paste0(processed_dir, "/dataset_info.json"),
+  input_results = paste0(processed_dir, "/results.json"),
+  # Outputs
+  output = "quality_control.json"
+)
+## VIASH END
+
+################################################################################
+#                               FUNCTIONS
+################################################################################
+
+create_qc_entry <- function(
+  category,
+  label,
+  value,
+  severity_value,
+  condition,
+  message
+) {
+  # If values are missing, set to -1
+  # This can happen if a method/metric is not run and therefore has no results
+  if (is.null(value) || is.na(value) || length(value) == 0) {
+    value <- -1
+  }
+
+  if (
+    is.null(severity_value) ||
+      is.na(severity_value) ||
+      length(severity_value) == 0
+  ) {
+    severity_value <- -1
+  }
+
+  severity <- dplyr::case_when(
+    severity_value < 0 ~ 3L,
+    severity_value < 1 ~ 0L,
+    severity_value < 2 ~ 1L,
+    severity_value < 3 ~ 2L,
+    TRUE ~ 3L
+  )
+
+  list(
+    category = category,
+    label = label,
+    value = value,
+    severity = severity,
+    severity_value = severity_value,
+    condition = condition,
+    message = message
+  )
+}
+
+percent_missing <- function(items, field) {
+  is_missing <- purrr::map_lgl(items, \(.item) {
+    if (field == "references") {
+      return(references_missing(.item))
+    }
+
+    field_value <- .item[[field]]
+    is.null(field_value) ||
+      is.na(field_value) ||
+      (is.character(field_value) && field_value == "")
+  })
+
+  mean(is_missing)
+}
+
+references_missing <- function(item) {
+  # Special case for control methods without references
+  if ("type" %in% names(item) && item$type == "control_method") {
+    return(FALSE)
+  }
+
+  references <- item$references
+  if (length(references) == 0) {
+    return(TRUE)
+  }
+
+  if (
+    length(references) == 2 && all(c("doi", "bibtex") %in% names(references))
+  ) {
+    if (length(references$doi) == 0 && length(references$bibtex) == 0) {
+      return(TRUE)
+    }
+  }
+
+  return(FALSE)
+}
+
+check_info_fields <- function(info, type, expected_fields, task_name) {
+  category <- paste(stringr::str_to_title(type), "info")
+
+  purrr::map(expected_fields, function(.field) {
+    pct_missing <- percent_missing(info, .field)
+    create_qc_entry(
+      category = category,
+      label = paste0("Info field '", .field, "' % missing"),
+      value = pct_missing,
+      severity_value = ifelse(pct_missing > 0, 3.0, 0.0),
+      condition = "pct_missing <= 0",
+      message = paste0(
+        category,
+        " field '",
+        .field,
+        "' should be defined\n",
+        "  Task: ",
+        task_name,
+        "\n",
+        "  Field: ",
+        .field,
+        "\n",
+        "  Percentage missing: ",
+        round(pct_missing * 100, 0)
+      )
+    )
+  })
+}
+
+check_missing_results <- function(
+  results_long,
+  name,
+  type,
+  n_datasets,
+  n_methods,
+  n_metrics,
+  task_name
+) {
+  n_expected <- switch(
+    type,
+    "dataset" = n_methods * n_metrics,
+    "method" = n_datasets * n_metrics,
+    "metric" = n_datasets * n_methods
+  )
+
+  name_col <- paste0(type, "_name")
+  n_results <- results_long |>
+    dplyr::filter(!!rlang::sym(name_col) == name) |>
+    nrow()
+  pct_missing <- 1 - (n_results / n_expected)
+
+  title <- type |>
+    stringr::str_replace_all("_", " ") |>
+    stringr::str_to_sentence()
+
+  create_qc_entry(
+    category = "Raw results",
+    label = paste0(title, " '", name, "' % missing"),
+    value = pct_missing,
+    severity_value = pct_missing / 0.1,
+    condition = "pct_missing <= 0.1",
+    message = paste0(
+      "Percentage of missing results should be less than 10%\n",
+      "  Task: ",
+      task_name,
+      "\n",
+      "  ",
+      title,
+      ": ",
+      name,
+      "\n",
+      "  Number of results: ",
+      n_results,
+      "\n",
+      "  Expected number of results: ",
+      n_expected,
+      "\n",
+      "  Percentage missing: ",
+      round(pct_missing * 100, 0),
+      "%\n"
+    )
+  )
+}
+
+check_failed_processes <- function(results, name, type, task_name) {
+  name_col <- paste0(type, "_name")
+  results_name <- results |>
+    dplyr::filter(!!rlang::sym(name_col) == name)
+
+  n_expected <- nrow(results_name)
+  n_succeeded <- sum(results_name$succeeded)
+  pct_failed <- 1 - (n_succeeded / n_expected)
+
+  title <- type |>
+    stringr::str_replace_all("_", " ") |>
+    stringr::str_to_sentence()
+
+  create_qc_entry(
+    category = "Raw results",
+    label = paste0(title, " '", name, "' % failed"),
+    value = pct_failed,
+    severity_value = pct_failed / 0.1,
+    condition = "pct_failed <= 0.1",
+    message = paste0(
+      "Percentage of failed processes should be less than 10%\n",
+      "  Task: ",
+      task_name,
+      "\n",
+      "  ",
+      title,
+      ": ",
+      name,
+      "\n",
+      "  Succeeded processes: ",
+      n_succeeded,
+      "\n",
+      "  Attempted processes: ",
+      n_expected,
+      "\n",
+      "  Percentage failed: ",
+      round(pct_failed * 100, 0),
+      "%\n"
+    )
+  )
+}
+
+check_metric_scaling <- function(
+  results_long,
+  metric,
+  control_methods,
+  task_name
+) {
+  `%||%` <- rlang::`%||%`
+
+  metric_results <- results_long |>
+    dplyr::filter(metric_name == metric) |>
+    dplyr::select(-metric_name)
+
+  if (
+    nrow(metric_results) == 0 ||
+      !any(control_methods %in% metric_results$method_name)
+  ) {
+    return(list())
+  }
+
+  control_range <- metric_results |>
+    dplyr::filter(
+      method_name %in% control_methods
+    ) |>
+    dplyr::group_by(dataset_name) |>
+    dplyr::summarise(
+      control_min = min(metric_value),
+      control_max = max(metric_value)
+    )
+
+  scaled_metrics <- metric_results |>
+    dplyr::left_join(control_range, by = "dataset_name") |>
+    dplyr::mutate(
+      scaled_value = (metric_value - control_min) / (control_max - control_min),
+      outside = scaled_value < 0 | scaled_value > 1,
+      pct_outside = dplyr::case_when(
+        scaled_value < 0 ~ 0 - scaled_value,
+        scaled_value > 1 ~ scaled_value - 1,
+        TRUE ~ NA
+      )
+    )
+
+  pct_outside <- sum(scaled_metrics$outside) / nrow(scaled_metrics)
+  worst_score <- min(scaled_metrics$scaled_value)
+  worst_pct_outside <- if (!is.na(worst_score) && worst_score < 0) {
+    max(scaled_metrics$pct_outside[scaled_metrics$scaled_value < 0])
+  } else {
+    0
+  }
+  best_score <- max(scaled_metrics$scaled_value)
+  best_pct_outside <- if (!is.na(best_score) && best_score > 1) {
+    max(scaled_metrics$pct_outside[scaled_metrics$scaled_value > 1])
+  } else {
+    0
+  }
+
+  metric_checks <- list(
+    create_qc_entry(
+      category = "Scaling",
+      label = paste0("Metric '", metric, "' % outside range"),
+      value = pct_outside,
+      severity_value = pct_outside / 0.1,
+      condition = "pct_outside <= 0.1",
+      message = paste0(
+        "Percentage of scaled scores outside control range should be less than 10%\n",
+        "  Task: ",
+        task_name,
+        "\n",
+        "  Metric: ",
+        metric,
+        "\n",
+        "  Inside range: ",
+        sum(!scaled_metrics$outside),
+        "\n",
+        "  Scaled scores: ",
+        nrow(scaled_metrics),
+        "\n",
+        "  Percentage outside: ",
+        round(pct_outside * 100, 0),
+        "%\n"
+      )
+    ),
+    create_qc_entry(
+      category = "Scaling",
+      label = paste0("Metric '", metric, "' worst score % outside range"),
+      value = worst_pct_outside,
+      severity_value = worst_pct_outside / 0.1,
+      condition = "worst_pct_outside <= 0.1",
+      message = paste0(
+        "The worst scaled score should be less than 10% outside the control range\n",
+        "  Task: ",
+        task_name,
+        "\n",
+        "  Metric: ",
+        metric,
+        "\n",
+        "  Worst score: ",
+        worst_score,
+        "\n",
+        "  Percentage outside range: ",
+        round(worst_pct_outside * 100, 0),
+        "%\n"
+      )
+    ),
+    create_qc_entry(
+      category = "Scaling",
+      label = paste0("Metric '", metric, "' best score % outside range"),
+      value = best_pct_outside,
+      severity_value = best_pct_outside / 0.1,
+      condition = "best_pct_outside <= 0.1",
+      message = paste0(
+        "The best scaled score should be less than 10% outside the control range\n",
+        "  Task: ",
+        task_name,
+        "\n",
+        "  Metric: ",
+        metric,
+        "\n",
+        "  Best score: ",
+        best_score,
+        "\n",
+        "  Percentage outside range: ",
+        round(best_pct_outside * 100, 0),
+        "%\n"
+      )
+    )
+  )
+
+  method_metric_checks <- purrr::map(
+    sort(unique(scaled_metrics$method_name)),
+    function(.method) {
+      check_method_metric_scaling(scaled_metrics, .method, task_name, metric)
+    }
+  ) |>
+    purrr::list_flatten()
+
+  c(metric_checks, method_metric_checks)
+}
+
+check_method_metric_scaling <- function(
+  scaled_metrics,
+  method,
+  task_name,
+  metric_name
+) {
+  method_scaled_metrics <- scaled_metrics |>
+    dplyr::filter(method_name == method)
+
+  worst_score <- min(method_scaled_metrics$scaled_value)
+  worst_pct_outside <- if (!is.na(worst_score) && worst_score < 0) {
+    max(method_scaled_metrics$pct_outside[
+      method_scaled_metrics$scaled_value < 0
+    ])
+  } else {
+    0
+  }
+  best_score <- max(method_scaled_metrics$scaled_value)
+  best_pct_outside <- if (!is.na(worst_score) && best_score > 1) {
+    max(method_scaled_metrics$pct_outside[
+      method_scaled_metrics$scaled_value > 1
+    ])
+  } else {
+    0
+  }
+
+  list(
+    create_qc_entry(
+      category = "Scaling",
+      label = paste0("Worst '", metric_name, "' score for '", method, "'"),
+      value = worst_score,
+      severity_value = ifelse(worst_score < -1, worst_pct_outside, 0),
+      condition = "worst_score < -1",
+      message = paste0(
+        "Method '",
+        method,
+        "' performs much worse than controls for metric' ",
+        metric_name,
+        "'\n",
+        "  Task: ",
+        task_name,
+        "\n",
+        "  Method: ",
+        method,
+        "\n",
+        "  Metric: ",
+        metric_name,
+        "\n",
+        "  Worst score: ",
+        worst_score,
+        "\n",
+        "  Percentage outside range: ",
+        round(worst_pct_outside * 100, 0),
+        "%\n"
+      )
+    ),
+    create_qc_entry(
+      category = "Scaling",
+      label = paste0("Best '", metric_name, "' score for '", method, "'"),
+      value = best_score,
+      severity_value = ifelse(best_score > 2, best_pct_outside, 0),
+      condition = "best_score > 2",
+      message = paste0(
+        "Method '",
+        method,
+        "' performs much better than controls for metric '",
+        metric_name,
+        "'\n",
+        "  Task: ",
+        task_name,
+        "\n",
+        "  Method: ",
+        method,
+        "\n",
+        "  Metric: ",
+        metric_name,
+        "\n",
+        "  Best score: ",
+        best_score,
+        "\n",
+        "  Percentage outside range: ",
+        round(best_pct_outside * 100, 0),
+        "%\n"
+      )
+    )
+  )
+}
+
+################################################################################
+#                              MAIN SCRIPT
+################################################################################
+
+cat("====== Generate QC ======\n")
+
+cat("\n>>> Reading input files...\n")
+cat("Reading task info from '", par$input_task_info, "'...\n", sep = "")
+task_info <- jsonlite::read_json(par$input_task_info)
+
+cat("Reading dataset info from '", par$input_dataset_info, "'...\n", sep = "")
+dataset_info <- jsonlite::read_json(par$input_dataset_info)
+
+cat("Reading method info from '", par$input_method_info, "'...\n", sep = "")
+method_info <- jsonlite::read_json(par$input_method_info)
+
+cat("Reading metric info from '", par$input_metric_info, "'...\n", sep = "")
+metric_info <- jsonlite::read_json(par$input_metric_info)
+
+cat("Reading results from '", par$input_results, "'...\n", sep = "")
+results <- jsonlite::read_json(par$input_results, simplifyVector = TRUE)
+
+cat("\n>>> Checking expected info fields...\n")
+
+expected_task_fields <- c("name", "label", "summary", "description")
+expected_dataset_fields <- c(
+  "name",
+  "label",
+  "summary",
+  "description",
+  "references"
+)
+expected_method_fields <- c(
+  "name",
+  "label",
+  "commit",
+  "summary",
+  "description",
+  "references"
+)
+expected_metric_fields <- c(
+  "name",
+  "label",
+  "commit",
+  "summary",
+  "description",
+  "references"
+)
+
+task_name <- task_info$name %||% "unknown"
+
+info_task <- check_info_fields(
+  list(task_info),
+  "task",
+  expected_task_fields,
+  task_name
+)
+info_datasets <- check_info_fields(
+  dataset_info,
+  "dataset",
+  expected_dataset_fields,
+  task_name
+)
+info_methods <- check_info_fields(
+  method_info,
+  "method",
+  expected_method_fields,
+  task_name
+)
+info_metrics <- check_info_fields(
+  metric_info,
+  "metric",
+  expected_metric_fields,
+  task_name
+)
+
+cat("\n>>> Checking missing results...\n")
+results_long <- results |>
+  dplyr::select(dataset_name, method_name, metric_names, metric_values) |>
+  tidyr::unnest_longer(c("metric_names", "metric_values")) |>
+  dplyr::rename(
+    metric_name = metric_names,
+    metric_value = metric_values
+  ) |>
+  dplyr::filter(!is.na(metric_value))
+
+dataset_names <- purrr::map_chr(dataset_info, "name")
+method_names <- purrr::map_chr(method_info, "name")
+metric_names <- purrr::map_chr(metric_info, "name")
+
+n_datasets <- length(dataset_names)
+n_methods <- length(method_names)
+n_metrics <- length(metric_names)
+
+n_results_expected <- n_datasets * n_methods * n_metrics
+n_results <- nrow(results_long)
+pct_results_missing <- 1 - (n_results / n_results_expected)
+
+results_task <- list(
+  create_qc_entry(
+    category = "Raw results",
+    label = "Task number of results",
+    value = n_results,
+    severity_value = pct_results_missing / 0.1,
+    condition = "length(dataset_info) * length(results) == length(method_info) * length(metric_info)",
+    message = paste0(
+      "Number of results should be equal to #datasets × #methods × #metrics \n",
+      "  Task: ",
+      task_name,
+      "\n",
+      "  Number of results: ",
+      n_results,
+      "\n",
+      "  Number of datasets: ",
+      n_datasets,
+      "\n",
+      "  Number of methods: ",
+      n_methods,
+      "\n",
+      "  Number of metrics: ",
+      n_metrics,
+      "\n",
+      "  Expected number of results: ",
+      n_results_expected,
+      "\n"
+    )
+  )
+)
+
+results_datasets <- purrr::map(dataset_names, function(.dataset) {
+  check_missing_results(
+    results_long,
+    .dataset,
+    "dataset",
+    n_datasets,
+    n_methods,
+    n_metrics,
+    task_name
+  )
+})
+results_methods <- purrr::map(method_names, function(.method) {
+  check_missing_results(
+    results_long,
+    .method,
+    "method",
+    n_datasets,
+    n_methods,
+    n_metrics,
+    task_name
+  )
+})
+results_metrics <- purrr::map(metric_names, function(.metric) {
+  check_missing_results(
+    results_long,
+    .metric,
+    "metric",
+    n_datasets,
+    n_methods,
+    n_metrics,
+    task_name
+  )
+})
+
+cat("\n>>> Checking failed processes\n")
+metric_component_results <- results |>
+  dplyr::select(dataset_name, method_name, metric_components) |>
+  tidyr::unnest(metric_components) |>
+  dplyr::rename(metric_component_name = component_name)
+
+n_processes <- nrow(results) + nrow(metric_component_results)
+n_succeeded <- sum(results$succeeded) + sum(metric_component_results$succeeded)
+pct_failed <- 1 - (n_succeeded / n_processes)
+
+failed_task <- list(
+  create_qc_entry(
+    category = "Raw results",
+    label = "Task number of successful processes",
+    value = n_succeeded,
+    severity_value = pct_failed / 0.1,
+    condition = "sum(results$succeeded) + sum(metric_component_results$succeeded) == nrow(results) + nrow(metric_component_results)",
+    message = paste0(
+      "Number of successful processes should be equal to the number of attempted processes\n",
+      "  Task: ",
+      task_name,
+      "\n",
+      "  Succeeded processes: ",
+      n_succeeded,
+      "\n",
+      "  Attempted processes: ",
+      n_processes,
+      "\n",
+      "  Percentage failed: ",
+      round(pct_failed * 100, 0),
+      "%\n"
+    )
+  )
+)
+
+failed_datasets <- purrr::map(dataset_names, function(.dataset) {
+  check_failed_processes(results, .dataset, "dataset", task_name)
+})
+failed_methods <- purrr::map(method_names, function(.method) {
+  check_failed_processes(results, .method, "method", task_name)
+})
+
+failed_metrics <- purrr::map(
+  unique(metric_component_results$metric_component_name),
+  function(.component) {
+    check_failed_processes(
+      metric_component_results,
+      .component,
+      "metric_component",
+      task_name
+    )
+  }
+)
+
+cat("\n>>> Checking control methods...\n")
+is_control <- purrr::map_lgl(method_info, \(.method) {
+  .method$type == "control_method"
+})
+control_methods <- method_names[is_control]
+
+dataset_controls <- results_long |>
+  dplyr::filter(method_name %in% control_methods) |>
+  dplyr::select(dataset_name, method_name) |>
+  dplyr::distinct() |>
+  dplyr::group_by(dataset_name) |>
+  dplyr::count(name = "n_controls") |>
+  dplyr::ungroup() |>
+  dplyr::mutate(dataset_name = factor(dataset_name, levels = dataset_names)) |>
+  tidyr::complete(dataset_name, fill = list(n_controls = 0))
+
+controls_datasets <- purrr::map(
+  seq_len(nrow(dataset_controls)),
+  function(.idx) {
+    dataset_name <- dataset_controls$dataset_name[.idx]
+    n_controls <- dataset_controls$n_controls[.idx]
+
+    create_qc_entry(
+      category = "Raw results",
+      label = paste0("Dataset `", dataset_name, "' number of control methods"),
+      value = n_controls,
+      severity_value = ifelse(n_controls != length(control_methods), 3, 0),
+      condition = "n_controls != length(control_methods)",
+      message = paste0(
+        "Number of successful control methods for a dataset should equal the number of controls\n",
+        "  Task: ",
+        task_name,
+        "\n",
+        "  Dataset: ",
+        dataset_name,
+        "\n",
+        "  Succeeded control_methods: ",
+        n_controls,
+        "\n",
+        "  Total control methods: ",
+        length(control_methods),
+        "\n",
+        "  Percentage succeeded: ",
+        round(n_controls / length(control_methods) * 100, 0),
+        "%\n"
+      )
+    )
+  }
+)
+
+metric_controls <- results_long |>
+  dplyr::filter(method_name %in% control_methods) |>
+  dplyr::select(method_name, metric_name) |>
+  dplyr::group_by(metric_name) |>
+  dplyr::count(name = "n_controls") |>
+  dplyr::ungroup() |>
+  dplyr::mutate(metric_name = factor(metric_name, levels = metric_names)) |>
+  tidyr::complete(metric_name, fill = list(n_controls = 0))
+
+n_expected <- length(dataset_names) * length(control_methods)
+controls_metrics <- purrr::map(seq_len(nrow(metric_controls)), function(.idx) {
+  metric_name <- metric_controls$metric_name[.idx]
+  n_controls <- metric_controls$n_controls[.idx]
+
+  create_qc_entry(
+    category = "Raw results",
+    label = paste0("Metric '", metric_name, "' number of control methods"),
+    value = n_controls,
+    severity_value = ifelse(n_controls != n_expected, 3, 0),
+    condition = "n_controls != length(datasets) * length(control_methods)",
+    message = paste0(
+      "Number of metric scores for control methods should be equal to #datasets × #control_methods\n",
+      "  Task: ",
+      task_name,
+      "\n",
+      "  Metric: ",
+      metric_name,
+      "\n",
+      "  Control method scores: ",
+      n_controls,
+      "\n",
+      "  Expected control method scores: ",
+      n_expected,
+      "\n",
+      "  Percentage succeeded: ",
+      round(n_controls / n_expected * 100, 0),
+      "%\n"
+    )
+  )
+})
+
+cat("\n>>> Checking metric scaling...\n")
+scaling <- purrr::map(metric_names, function(.metric) {
+  check_metric_scaling(results_long, .metric, control_methods, task_name)
+}) |>
+  purrr::list_flatten()
+
+cat("\n>>> Collecting QC results...\n")
+qc_results <- c(
+  info_task,
+  info_datasets,
+  info_methods,
+  info_metrics,
+  results_task,
+  results_datasets,
+  results_methods,
+  results_metrics,
+  failed_task,
+  failed_datasets,
+  failed_methods,
+  failed_metrics,
+  controls_datasets,
+  controls_metrics,
+  scaling
+)
+
+cat("\n>>> Writing output file...\n")
+cat("Writing quality control to '", par$output, "'...\n", sep = "")
+jsonlite::write_json(
+  qc_results,
+  par$output,
+  pretty = TRUE,
+  null = "null",
+  na = "null",
+  auto_unbox = TRUE
+)
+
+cat("\n>>> Validating output against schema...\n")
+results_schemas <- file.path(meta$resources_dir, "schemas", "results_v4")
+ajv_args <- paste(
+  "validate",
+  "--spec draft2020",
+  "-s",
+  file.path(results_schemas, "quality_control.json"),
+  "-d",
+  par$output
+)
+
+cat("Running validation command:", "ajv", ajv_args, "\n")
+cat("Output:\n")
+validation_result <- system2("ajv", ajv_args)
+
+if (validation_result == 0) {
+  cat("JSON validation passed successfully!\n")
+} else {
+  cat("JSON validation failed!\n")
+  stop("Output JSON does not conform to schema")
+}
+
+cat("\n>>> Done!\n")
diff --git a/src/reporting/generate_qc/script.py b/src/reporting/generate_qc/script.py
deleted file mode 100644
index 685cc6436..000000000
--- a/src/reporting/generate_qc/script.py
+++ /dev/null
@@ -1,302 +0,0 @@
-import json
-import numpy as np
-
-## VIASH START
-par = {
-  "task_info": "resources_test/openproblems/task_results_v3/processed/task_info.json",
-  "method_info": "resources_test/openproblems/task_results_v3/processed/method_info.json",
-  "metric_info": "resources_test/openproblems/task_results_v3/processed/metric_info.json",
-  "dataset_info": "resources_test/openproblems/task_results_v3/processed/dataset_info.json",
-  "results": "resources_test/openproblems/task_results_v3/processed/results.json",
-  "output": "output.json"
-}
-## VIASH END
-
-EXPECTED_TASK_FIELDS = ["task_id", "task_name", "task_summary", "task_description"]
-EXPECTED_METHOD_FIELDS = ["task_id", "commit_sha", "method_id", "method_name", "method_summary", "paper_reference", "is_baseline"]
-EXPECTED_METRIC_FIELDS = ["task_id", "commit_sha", "metric_id", "metric_name", "metric_summary", "paper_reference", "maximize"]
-EXPECTED_DATASET_FIELDS = ["task_id", "dataset_id", "dataset_name", "dataset_summary", "data_reference", "data_url"]
-
-def dump_json(obj, fp):
-    """Dump to JSON in a numpy-safe fashion."""
-    json.dump(
-        obj,
-        fp,
-        indent=4,
-        sort_keys=False,
-        separators=(", ", ": "),
-        ensure_ascii=False,
-    )
-
-def create_quality_control(task_info, dataset_info, method_info, metric_info, results):
-    """Quality control to detect anomalies in the results."""
-    task_id = task_info["task_id"]
-
-    result_qc = []
-
-    def add_qc(
-        category: str,
-        name: str,
-        value,
-        severity_value: float,
-        code: str,
-        message: str,
-    ) -> None:
-        "Add an entry to the result qc"
-        if severity_value <= 1:
-            severity = 0
-        elif severity_value <= 2:
-            severity = 1
-        elif severity_value <= 3:
-            severity = 2
-        else:
-            severity = 3
-        result_qc.append({
-            "task_id": task_id,
-            "category": category,
-            "name": name,
-            "value": value,
-            "severity": severity,
-            "severity_value": severity_value,
-            "code": code,
-            "message": message
-        })
-    
-    def percent_missing(list_of_dicts, field):
-        are_missing = []
-        for item in list_of_dicts:
-            if field == "paper_reference" and item.get("is_baseline", False):
-                are_missing.append(0.0)
-            elif field in item and item[field] is not None:
-                are_missing.append(0.0)
-            else:
-                are_missing.append(1.0)
-        return np.mean(are_missing)
-    
-    # check task_info
-    for field in EXPECTED_TASK_FIELDS:
-        pct_missing = percent_missing([task_info], field)
-        add_qc(
-            "Task info",
-            f"Pct '{field}' missing",
-            pct_missing,
-            3.0 if pct_missing > 0 else 0.0,
-            "percent_missing([task_info], field)",
-            f"Task metadata field '{field}' should be defined\n"
-            f"  Task id: {task_id}\n"
-            f"  Field: {field}\n"
-        )
-    
-    # check method_info
-    for field in EXPECTED_METHOD_FIELDS:
-        pct_missing = percent_missing(method_info, field)
-        add_qc(
-            "Method info",
-            f"Pct '{field}' missing",
-            pct_missing,
-            3.0 if pct_missing > 0 else 0.0,
-            "percent_missing(method_info, field)",
-            f"Method metadata field '{field}' should be defined\n"
-            f"  Task id: {task_id}\n"
-            f"  Field: {field}\n"
-        )
-
-    # check metric_info
-    for field in EXPECTED_METRIC_FIELDS:
-        pct_missing = percent_missing(metric_info, field)
-        add_qc(
-            "Metric info",
-            f"Pct '{field}' missing",
-            pct_missing,
-            3.0 if pct_missing > 0 else 0.0,
-            "percent_missing(metric_info, field)",
-            f"Metric metadata field '{field}' should be defined\n"
-            f"  Task id: {task_id}\n"
-            f"  Field: {field}\n"
-        )
-
-    # check dataset_info
-    for field in EXPECTED_DATASET_FIELDS:
-        pct_missing = percent_missing(dataset_info, field)
-        add_qc(
-            "Dataset info",
-            f"Pct '{field}' missing",
-            pct_missing,
-            3.0 if pct_missing > 0 else 0.0,
-            "percent_missing(dataset_info, field)",
-            f"Dataset metadata field '{field}' should be defined\n"
-            f"  Task id: {task_id}\n"
-            f"  Field: {field}\n"
-        )
-
-    # turn results into long format for easier processing
-    results_long = [
-        {
-            "task_id": task_id,
-            "method_id": x["method_id"],
-            "dataset_id": x["dataset_id"],
-            "metric_id": metric["metric_id"],
-            "metric_value" : x["metric_values"].get(metric["metric_id"]),
-            "scaled_score" : x["scaled_scores"].get(metric["metric_id"]),
-        }
-        for metric in metric_info
-        for x in results
-    ]
-
-    # check percentage missing
-    pct_missing = 1 - len(results_long) / (len(method_info) * len(metric_info) * len(dataset_info))
-    add_qc(
-        "Raw data",
-        "Number of results",
-        len(results),
-        pct_missing / .1,
-        "len(results) == len(method_info) * len(metric_info) * len(dataset_info)",
-        f"Number of results should be equal to #methods × #metrics × #datasets.\n"
-        f"  Task id: {task_id}\n"
-        f"  Number of results: {len(results)}\n"
-        f"  Number of methods: {len(method_info)}\n"
-        f"  Number of metrics: {len(metric_info)}\n"
-        f"  Number of datasets: {len(dataset_info)}\n"
-    )
-
-    # QC per metric
-    for metric in metric_info:
-        metric_id = metric["metric_id"]
-        values = [
-            res
-            for res in results_long
-            if res["metric_id"] == metric_id
-            and res["metric_value"] is not None
-            and np.isreal(res["metric_value"])
-        ]
-        pct_missing = 1 - len(values) / len(dataset_info) / len(method_info)
-
-        add_qc(
-            "Raw results",
-            f"Metric '{metric_id}' %missing",
-            pct_missing,
-            pct_missing / .1,
-            "pct_missing <= .1",
-            f"Percentage of missing results should be less than 10%.\n"
-            f"  Task id: {task_id}\n"
-            f"  Metric id: {metric_id}\n"
-            f"  Percentage missing: {pct_missing*100:.0f}%\n"
-        )
-
-    # QC per method
-    for method in method_info:
-        method_id = method["method_id"]
-        values = [ 
-            res
-            for res in results_long
-            if res["method_id"] == method_id
-            and res["metric_value"] is not None
-            and np.isreal(res["metric_value"])
-        ]
-        pct_missing = 1 - len(values) / len(dataset_info) / len(metric_info)
-
-        add_qc(
-            "Raw results",
-            f"Method '{method_id}' %missing",
-            pct_missing,
-            pct_missing / .1,
-            "pct_missing <= .1",
-            f"Percentage of missing results should be less than 10%.\n"
-            f"  Task id: {task_id}\n"
-            f"  method id: {method_id}\n"
-            f"  Percentage missing: {pct_missing*100:.0f}%\n"
-        )
-
-    # QC per dataset
-    for dataset in dataset_info:
-        dataset_id = dataset["dataset_id"]
-        values = [
-            res
-            for res in results_long
-            if res["dataset_id"] == dataset_id
-            and res["metric_value"] is not None
-            and np.isreal(res["metric_value"])
-        ]
-        pct_missing = 1 - len(values) / len(metric_info) / len(method_info)
-
-        add_qc(
-            "Raw results",
-            f"Dataset '{dataset_id}' %missing",
-            pct_missing,
-            pct_missing / .1,
-            "pct_missing <= .1",
-            f"Percentage of missing results should be less than 10%.\n"
-            f"  Task id: {task_id}\n"
-            f"  dataset id: {dataset_id}\n"
-            f"  Percentage missing: {pct_missing*100:.0f}%\n"
-        )
-
-
-    # QC per metric and method
-    for metric in metric_info:
-        for method in method_info:
-            metric_id = metric["metric_id"]
-            method_id = method["method_id"]
-            scores = [ 
-                res["scaled_score"]
-                for res in results_long
-                if res["metric_id"] == metric_id
-                and res["method_id"] == method_id
-                and res["scaled_score"] is not None
-                and np.isreal(res["scaled_score"])
-            ]
-
-            if len(scores) >= 1:
-                worst_score = np.min(scores).item()
-                best_score = np.max(scores).item()
-
-                add_qc(
-                    "Scaling",
-                    f"Worst score {method_id} {metric_id}",
-                    worst_score,
-                    worst_score / -1,
-                    "worst_score >= -1",
-                    f"Method {method_id} performs much worse than baselines.\n"
-                    f"  Task id: {task_id}\n"
-                    f"  Method id: {method_id}\n"
-                    f"  Metric id: {metric_id}\n"
-                    f"  Worst score: {worst_score}%\n"
-                )
-
-                add_qc(
-                    "Scaling",
-                    f"Best score {method_id} {metric_id}",
-                    best_score,
-                    best_score / 2,
-                    "best_score <= 2",
-                    f"Method {method_id} performs a lot better than baselines.\n"
-                    f"  Task id: {task_id}\n"
-                    f"  Method id: {method_id}\n"
-                    f"  Metric id: {metric_id}\n"
-                    f"  Best score: {best_score}%\n"
-                )
-
-    return result_qc
-
-def main(par):
-    # read data from files
-    with open(par["task_info"], "r", encoding="utf8") as file:
-        task_info = json.load(file)
-    with open(par["method_info"], "r", encoding="utf8") as file:
-        method_info = json.load(file)
-    with open(par["metric_info"], "r", encoding="utf8") as file:
-        metric_info = json.load(file)
-    with open(par["dataset_info"], "r", encoding="utf8") as file:
-        dataset_info = json.load(file)
-    with open(par["results"], "r", encoding="utf8") as file:
-        results = json.load(file)
-
-    # create info objects
-    quality_control = create_quality_control(task_info, dataset_info, method_info, metric_info, results)
-
-    # write data to files
-    with open(par["output"], "w", encoding="utf8") as file:
-        dump_json(quality_control, file)
-
-if __name__ == "__main__":
-    main(par)
diff --git a/src/reporting/get_dataset_info/config.vsh.yaml b/src/reporting/get_dataset_info/config.vsh.yaml
index ff43cb478..9cfbbe640 100644
--- a/src/reporting/get_dataset_info/config.vsh.yaml
+++ b/src/reporting/get_dataset_info/config.vsh.yaml
@@ -1,35 +1,61 @@
 name: get_dataset_info
 namespace: reporting
-description: Extract dataset info and convert to expected format for website results
-arguments:
-  - name: --input
-    type: file
-    description: A yaml file
-    required: true
-    example: resources_test/openproblems/task_results_v3/raw/dataset_uns.yaml
-  - name: --output
-    type: file
-    direction: output
-    default: output.json
-    description: Output json
-    info:
-      format:
-        type: json
-        # TODO: add schema
+description: Convert dataset uns YAML to schema-compliant JSON
+
+argument_groups:
+  - name: Inputs
+    arguments:
+    - name: --input
+      type: file
+      description: A YAML file containing dataset uns
+      required: true
+      example: resources_test/openproblems/task_results_v4/raw/dataset_uns.yaml
+
+  - name: Outputs
+    arguments:
+    - name: --output
+      type: file
+      direction: output
+      default: dataset_info.json
+      description: Output JSON file matching dataset info schema
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/dataset_info.json
+      example: resources_test/openproblems/task_results_v4/processed/dataset_info.json
+
 resources:
   - type: r_script
     path: script.R
+  - path: /src/reporting/shared/functions.R
+    dest: functions.R
+  - path: /common/schemas
+    dest: schemas
+  - path: /src/reporting/shared/bibliography.bib
+    dest: bibliography.bib
+
 test_resources:
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
-  - path: /resources_test/openproblems/task_results_v3
-    dest: resources_test/openproblems/task_results_v3
+  - path: /resources_test/openproblems/task_results_v4
+    dest: resources_test/openproblems/task_results_v4
+
 engines:
   - type: docker
     image: openproblems/base_r:1
     setup:
+      - type: apt
+        packages:
+        - nodejs
+        - npm
+      - type: docker
+        run: npm install -g ajv-cli
       - type: r
-        cran: [ purrr, yaml, rlang, processx ]
+        cran:
+        - bibtex
+        - purrr
+        - stringr
+
 runners:
   - type: executable
   - type: nextflow
diff --git a/src/reporting/get_dataset_info/script.R b/src/reporting/get_dataset_info/script.R
index 797fdb1ad..19e3033da 100644
--- a/src/reporting/get_dataset_info/script.R
+++ b/src/reporting/get_dataset_info/script.R
@@ -1,53 +1,112 @@
-requireNamespace("jsonlite", quietly = TRUE)
-requireNamespace("yaml", quietly = TRUE)
-library(purrr, warn.conflicts = FALSE)
-library(rlang, warn.conflicts = FALSE)
-
 ## VIASH START
 par <- list(
-  input = "resources_test/openproblems/task_results_v3/raw/dataset_uns.yaml",
-  output = "resources_test/openproblems/task_results_v3/processed/dataset_info.json"
+  input = "resources_test/openproblems/task_results_v4/raw/dataset_uns.yaml",
+  output = "resources_test/openproblems/task_results_v4/processed/dataset_info.json"
 )
 ## VIASH END
 
-datasets <- yaml::yaml.load_file(par$input)
+source(file.path(meta$resources_dir, "functions.R"))
 
-# transform into format expected by website
-outputs <- map(datasets, function(dataset) {
-  # ↑ the 'dataset' object could be used as the new format
+`%||%` <- rlang::`%||%`
 
-  # TODO: it'd be nice if the s3 path was also included in the dataset info
+cat("====== Get dataset info ======\n")
 
-  # construct v1 format
-  out <- list(
-    "dataset_id" = dataset$dataset_id,
-    "dataset_name" = dataset$dataset_name,
-    "dataset_summary" = dataset$dataset_summary,
-    "dataset_description" = dataset$dataset_description %||% NA_character_,
-    "data_reference" = dataset$dataset_reference %||% NA_character_,
-    "data_url" = dataset$dataset_url %||% NA_character_,
-    "date_created" = dataset$date_created %||% NA_character_,
-    "file_size" = dataset$file_size %||% NA_character_
-  )
+cat("\n>>> Reading input files...\n")
+cat("Reading dataset uns from '", par$input, "'...\n", sep = "")
+dataset_uns <- yaml::yaml.load_file(
+  par$input,
+  # Read file sizes as floats to avoid issues with big integers
+  handlers = list(int = \(x) {
+    as.numeric(x)
+  })
+)
+
+cat(
+  "\n>>> Processing ",
+  length(dataset_uns),
+  " datasets...\n",
+  sep = ""
+)
+bibliography <- read_bibliography(
+  file.path(meta$resources_dir, "bibliography.bib")
+)
+dataset_info_json <- purrr::map(dataset_uns, function(.dataset) {
+  cat("Processing dataset uns '", .dataset$dataset_id, "'\n", sep = "")
+
+  authors <- get_authors_list(.dataset$authors)
 
-  if (!is.null(dataset[["common_dataset_id"]])) {
-    out[["common_dataset_id"]] <- dataset[["common_dataset_id"]]
+  if ("dataset_reference" %in% names(.dataset)) {
+    reference_name <- "dataset_reference"
+  } else if ("data_reference" %in% names(.dataset)) {
+    reference_name <- "data_reference"
+  } else {
+    stop("No reference found in dataset uns for '", .dataset$dataset_id, "'")
   }
 
-  # show warning when certain data is missing and return null?
-  for (n in names(out)) {
-    if (is.null(out[[n]])) {
-      out_as_str <- jsonlite::toJSON(out, auto_unbox = TRUE, pretty = TRUE)
-      stop("missing value for value '", n, "' in ", out_as_str)
-    }
+  references <- get_references_list(.dataset[[reference_name]], bibliography)
+
+  if ("dataset_url" %in% names(.dataset)) {
+    url_name <- "dataset_url"
+  } else if ("data_url" %in% names(.dataset)) {
+    url_name <- "data_url"
+  } else {
+    stop("No URL found in dataset uns for '", .dataset$dataset_id, "'")
   }
 
-  out
+  list(
+    name = jsonlite::unbox(.dataset$dataset_id),
+    label = jsonlite::unbox(.dataset$dataset_name),
+    commit = jsonlite::unbox(.dataset$dataset_commit %||% "missing-sha"),
+    summary = .dataset$dataset_summary |>
+      stringr::str_trim() |>
+      stringr::str_remove_all('(^"|"$|^\'|\'$)') |>
+      jsonlite::unbox(),
+    description = .dataset$dataset_description |>
+      stringr::str_trim() |>
+      stringr::str_remove_all('(^"|"$|^\'|\'$)') |>
+      jsonlite::unbox(),
+    source_url = jsonlite::unbox(.dataset[[url_name]]),
+    common_dataset_names = .dataset$common_dataset_id,
+    modalities = jsonlite::unbox(.dataset$dataset_modality),
+    organisms = .dataset$dataset_organism,
+    authors = authors,
+    references = references,
+    date_created = jsonlite::unbox(.dataset$date_created),
+    file_size_mb = jsonlite::unbox(.dataset$file_size / 1048576)
+  )
 })
 
+cat("\n>>> Writing output files...\n")
+cat("Writing dataset info to '", par$output, "'...\n", sep = "")
 jsonlite::write_json(
-  outputs,
+  dataset_info_json,
   par$output,
-  auto_unbox = TRUE,
-  pretty = TRUE
+  pretty = TRUE,
+  null = "null"
+)
+
+cat("\n>>> Validating output against schema...\n")
+results_schemas <- file.path(meta$resources_dir, "schemas", "results_v4")
+ajv_args <- paste(
+  "validate",
+  "--spec draft2020",
+  "-s",
+  file.path(results_schemas, "dataset_info.json"),
+  "-r",
+  file.path(results_schemas, "core.json"),
+  "-d",
+  par$output
 )
+
+cat("Running validation command:", "ajv", ajv_args, "\n")
+cat("Output:\n")
+validation_result <- system2("ajv", ajv_args)
+
+if (validation_result == 0) {
+  cat("JSON validation passed successfully!\n")
+} else {
+  cat("JSON validation failed!\n")
+  stop("Output JSON does not conform to schema")
+}
+
+cat("\n>>> Done!\n")
diff --git a/src/reporting/get_method_info/config.vsh.yaml b/src/reporting/get_method_info/config.vsh.yaml
index 23528273b..884418b7c 100644
--- a/src/reporting/get_method_info/config.vsh.yaml
+++ b/src/reporting/get_method_info/config.vsh.yaml
@@ -1,35 +1,61 @@
 name: get_method_info
 namespace: reporting
-description: Extract method info
-arguments:
-  - name: --input
-    type: file
-    description: A yaml file
-    required: true
-    example: resources_test/openproblems/task_results_v3/raw/method_configs.yaml
-  - name: --output
-    type: file
-    direction: output
-    default: output.json
-    description: Output json
-    info:
-      format:
-        type: json
-        # TODO: add schema
+description: Convert method configs YAML to schema-compliant JSON
+
+argument_groups:
+  - name: Inputs
+    arguments:
+    - name: --input
+      type: file
+      description: A YAML file containing method configs
+      required: true
+      example: resources_test/openproblems/task_results_v4/raw/method_configs.yaml
+
+  - name: Outputs
+    arguments:
+    - name: --output
+      type: file
+      direction: output
+      default: method_info.json
+      description: Output JSON file matching method info schema
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/method_info.json
+      example: resources_test/openproblems/task_results_v4/processed/method_info.json
+
 resources:
   - type: r_script
     path: script.R
+  - path: /src/reporting/shared/functions.R
+    dest: functions.R
+  - path: /common/schemas
+    dest: schemas
+  - path: /src/reporting/shared/bibliography.bib
+    dest: bibliography.bib
+
 test_resources:
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
-  - path: /resources_test/openproblems/task_results_v3
-    dest: resources_test/openproblems/task_results_v3
+  - path: /resources_test/openproblems/task_results_v4
+    dest: resources_test/openproblems/task_results_v4
+
 engines:
   - type: docker
     image: openproblems/base_r:1
     setup:
+      - type: apt
+        packages:
+        - nodejs
+        - npm
+      - type: docker
+        run: npm install -g ajv-cli
       - type: r
-        cran: [ purrr, yaml, rlang, processx ]
+        cran:
+        - bibtex
+        - purrr
+        - stringr
+
 runners:
   - type: executable
   - type: nextflow
diff --git a/src/reporting/get_method_info/script.R b/src/reporting/get_method_info/script.R
index 0623d89fd..7e0eed9cf 100644
--- a/src/reporting/get_method_info/script.R
+++ b/src/reporting/get_method_info/script.R
@@ -1,118 +1,175 @@
-requireNamespace("jsonlite", quietly = TRUE)
-requireNamespace("yaml", quietly = TRUE)
-library(purrr, warn.conflicts = FALSE)
-library(rlang, warn.conflicts = FALSE)
-
 ## VIASH START
 par <- list(
-  input = "method_configs.yaml",
-  output = "resources_test/openproblems/task_results_v3/processed/method_info.json"
+  input = "resources_test/openproblems/task_results_v4/processed/task_info.json",
+  output = "resources_test/openproblems/task_results_v4/processed/method_info.json"
 )
 ## VIASH END
 
-configs <- yaml::yaml.load_file(par$input)
+source(file.path(meta$resources_dir, "functions.R"))
 
-outputs <- map(configs, function(config) {
-  if (length(config$functionality$status) > 0 && config$functionality$status == "disabled") {
-    return(NULL)
-  }
+################################################################################
+#                               FUNCTIONS
+################################################################################
 
-  # prep for viash 0.9.0
-  build_info <- config$build_info %||% config$info
-  if ("functionality" %in% names(config)) {
-    config[names(config$functionality)] <- config$functionality
-    config[["functionality"]] <- NULL
-  }
+get_implementation_url <- function(config) {
+  paste0(
+    config$build_info$git_remote,
+    "/blob/",
+    config$build_info$git_commit,
+    "/",
+    config$build_info$config |>
+      stringr::str_replace(".*/src/", "src/") |>
+      stringr::str_remove("/config.vsh.yaml")
+  )
+}
 
-  info <- config$info
-
-  # add extra info
-  info$comp_path <- gsub(".*/src/", "src/", build_info$config) %>% gsub("/config.vsh.yaml", "", .)
-  info$task_id <- gsub("/.*", "", config$namespace)
-  info$id <- config$name
-  info$namespace <- config$namespace
-  info$label <- config$label %||% info$label
-  info$summary <- config$summary %||% info$summary
-  info$description <- config$description %||% info$description
-  info$commit_sha <- build_info$git_commit %||% "missing-sha"
-  info$code_version <- config$version
-  info$code_url <- config$links$repository
-  info$documentation_url <- config$links$documentation
-  # Check if the method has a docker container to create an image url. If it does not have a docker it will be a nextflow component consisting of different components that will have a docker image.
+get_container_image <- function(config) {
+  # Check if the method has a docker container to create an image url.
+  # If it does not have a docker it will be a nextflow component consisting of
+  # different components that will have a docker image.
   engines <- config$engines
-  has_docker <- any(map_lgl(engines, ~ .x$type == "docker"))
+  has_docker <- any(purrr::map_lgl(engines, ~ .x$type == "docker"))
   if (has_docker) {
-    info$image <- paste0(
+    paste0(
       "https://",
-      config$links$docker_registry, "/",
-      config$package_config$organization, "/",
-      config$package_config$name, "/",
-      gsub("src/", "", info$comp_path),
+      config$links$docker_registry,
+      "/",
+      config$package_config$organization,
+      "/",
+      config$package_config$name,
+      "/",
+      config$build_info$config |>
+        stringr::str_remove(".*/src/") |>
+        stringr::str_remove("/config.vsh.yaml"),
       ":",
-      info$code_version
+      config$version
     )
-  }  else {
-    info$image <- paste0(
+  } else {
+    paste0(
       "https://github.com/orgs/openproblems-bio/packages?repo_name=",
       config$package_config$name,
       "&q=",
-      gsub("src/", "", info$comp_path)
+      config$build_info$config |>
+        stringr::str_remove(".*/src/") |>
+        stringr::str_remove("/config.vsh.yaml")
     )
   }
-  info$implementation_url <- paste0(
-    build_info$git_remote, "/blob/",
-    build_info$git_commit, "/",
-    info$comp_path
+}
+
+get_additional_info <- function(config) {
+  # Fields that are stored elsewhere and we don't want to save here
+  exclude <- c(
+    "type",
+    "type_info",
+    "label",
+    "summary",
+    "description",
+    "documentation_url",
+    "authors"
   )
-  info$type_info <- NULL
-
-  # Flatten references
-  if (!is.null(config$references) && config$references != "") {
-    info <- imap(config$references, function(value, key) {
-      info[[paste0("references_", key)]] <- value
-      return(info)
-    })[[1]]
+
+  config$info[setdiff(names(config$info), exclude)] |>
+    purrr::map(recurse_unbox)
+}
+
+recurse_unbox <- function(x) {
+  if (is.list(x)) {
+    purrr::map(x, recurse_unbox)
+  } else if (length(x) == 1) {
+    jsonlite::unbox(x)
+  } else {
+    x
   }
-  info$references <- NULL
-
-  print(info)
-
-
-  # ↑ this could be used as the new format
-
-  # construct v1 format
-  out <- list(
-    task_id = info$task_id,
-    method_id = info$id,
-    method_name = info$label,
-    method_summary = info$summary,
-    method_description = info$description,
-    is_baseline = grepl("control", info$type),
-    references_doi = info$references_doi %||% NA_character_,
-    references_bibtex = info$references_bibtex %||% NA_character_,
-    code_url = info$code_url %||% NA_character_,
-    documentation_url = info$documentation_url %||% NA_character_,
-    image = info$image %||% NA_character_,
-    implementation_url = info$implementation_url %||% NA_character_,
-    code_version = info$code_version %||% NA_character_,
-    commit_sha = info$commit_sha
-  )
+}
+
+################################################################################
+#                              MAIN SCRIPT
+################################################################################
+
+cat("====== Get method info ======\n")
 
-  # show warning when certain data is missing and return null?
-  for (n in names(out)) {
-    if (is.null(out[[n]])) {
-      out_as_str <- jsonlite::toJSON(out, auto_unbox = TRUE, pretty = TRUE)
-      stop("missing value for value '", n, "' in ", out_as_str)
-    }
+`%||%` <- rlang::`%||%`
+
+cat("\n>>> Reading input files...\n")
+cat("Reading method info from '", par$input, "'...\n", sep = "")
+method_configs <- yaml::yaml.load_file(par$input)
+
+cat(
+  "\n>>> Processing ",
+  length(method_configs),
+  " method configs...\n",
+  sep = ""
+)
+bibliography <- read_bibliography(
+  file.path(meta$resources_dir, "bibliography.bib")
+)
+method_info_json <- purrr::map(method_configs, function(.config) {
+  if (.config$status == "disabled") {
+    cat("Skipping disabled method '", .config$name, "'\n", sep = "")
+    return(NULL)
+  } else {
+    cat("Processing method '", .config$name, "'\n", sep = "")
   }
 
-  # return output
-  out
+  list(
+    name = jsonlite::unbox(.config$name),
+    label = jsonlite::unbox(.config$label %||% .config$info$label),
+    commit = jsonlite::unbox(.config$build_info$git_commit %||% "missing-sha"),
+    summary = .config$summary %||%
+      .config$info$summary |>
+      stringr::str_trim() |>
+      stringr::str_remove_all('(^"|"$|^\'|\'$)') |>
+      jsonlite::unbox(),
+    description = .config$description %||%
+      .config$info$description |>
+      stringr::str_trim() |>
+      stringr::str_remove_all('(^"|"$|^\'|\'$)') |>
+      jsonlite::unbox(),
+    type = jsonlite::unbox(.config$info$type),
+    link_code = jsonlite::unbox(.config$links$repository),
+    link_documentation = jsonlite::unbox(
+      .config$links$documentation %||% .config$info$documentation_url
+    ),
+    link_implementation = jsonlite::unbox(get_implementation_url(.config)),
+    link_container_image = jsonlite::unbox(get_container_image(.config)),
+    authors = get_authors_list(.config$authors),
+    references = get_references_list(.config$references, bibliography),
+    additional_info = get_additional_info(.config),
+    version = jsonlite::unbox(.config$version)
+  )
 })
 
+cat("\n>>> Writing output files...\n")
+cat("Writing task info to '", par$output, "'...\n", sep = "")
 jsonlite::write_json(
-  outputs,
+  method_info_json,
   par$output,
-  auto_unbox = TRUE,
-  pretty = TRUE
-)
\ No newline at end of file
+  pretty = TRUE,
+  null = "null"
+)
+
+cat("\n>>> Validating output against schema...\n")
+results_schemas <- file.path(meta$resources_dir, "schemas", "results_v4")
+ajv_args <- paste(
+  "validate",
+  "--spec draft2020",
+  "-s",
+  file.path(results_schemas, "method_info.json"),
+  "-r",
+  file.path(results_schemas, "core.json"),
+  "-d",
+  par$output
+)
+
+cat("Running validation command:", "ajv", ajv_args, "\n")
+cat("Output:\n")
+validation_result <- system2("ajv", ajv_args)
+
+if (validation_result == 0) {
+  cat("JSON validation passed successfully!\n")
+} else {
+  cat("JSON validation failed!\n")
+  stop("Output JSON does not conform to schema")
+}
+
+cat("\n>>> Done!\n")
diff --git a/src/reporting/get_metric_info/config.vsh.yaml b/src/reporting/get_metric_info/config.vsh.yaml
index 597f4a420..fc5f05949 100644
--- a/src/reporting/get_metric_info/config.vsh.yaml
+++ b/src/reporting/get_metric_info/config.vsh.yaml
@@ -1,35 +1,61 @@
 name: get_metric_info
 namespace: reporting
-description: Extract metric info
-arguments:
-  - name: --input
-    type: file
-    description: A yaml file
-    required: true
-    example: resources_test/openproblems/task_results_v3/raw/metric_configs.yaml
-  - name: --output
-    type: file
-    direction: output
-    default: output.json
-    description: Output json
-    info:
-      format:
-        type: json
-        # TODO: add schema
+description: Convert metric configs YAML to schema-compliant JSON
+
+argument_groups:
+  - name: Inputs
+    arguments:
+    - name: --input
+      type: file
+      description: A YAML file containing metric configs
+      required: true
+      example: resources_test/openproblems/task_results_v4/raw/metric_configs.yaml
+
+  - name: Outputs
+    arguments:
+    - name: --output
+      type: file
+      direction: output
+      default: metric_info.json
+      description: Output JSON file matching metric info schema
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/metric_info.json
+      example: resources_test/openproblems/task_results_v4/processed/metric_info.json
+
 resources:
   - type: r_script
     path: script.R
+  - path: /src/reporting/shared/functions.R
+    dest: functions.R
+  - path: /common/schemas
+    dest: schemas
+  - path: /src/reporting/shared/bibliography.bib
+    dest: bibliography.bib
+
 test_resources:
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
-  - path: /resources_test/openproblems/task_results_v3
-    dest: resources_test/openproblems/task_results_v3
+  - path: /resources_test/openproblems/task_results_v4
+    dest: resources_test/openproblems/task_results_v4
+
 engines:
   - type: docker
     image: openproblems/base_r:1
     setup:
+      - type: apt
+        packages:
+        - nodejs
+        - npm
+      - type: docker
+        run: npm install -g ajv-cli
       - type: r
-        cran: [ purrr, yaml, rlang, processx ]
+        cran:
+        - bibtex
+        - purrr
+        - stringr
+
 runners:
   - type: executable
   - type: nextflow
diff --git a/src/reporting/get_metric_info/script.R b/src/reporting/get_metric_info/script.R
index 0f046bd90..778c84b99 100644
--- a/src/reporting/get_metric_info/script.R
+++ b/src/reporting/get_metric_info/script.R
@@ -1,102 +1,187 @@
-requireNamespace("jsonlite", quietly = TRUE)
-requireNamespace("yaml", quietly = TRUE)
-library(purrr, warn.conflicts = FALSE)
-library(rlang, warn.conflicts = FALSE)
-
 ## VIASH START
 par <- list(
-  input = "resources_test/openproblems/task_results_v3/raw/metric_configs.yaml",
-  output = "resources_test/openproblems/task_results_v3/processed/metric_info.json"
+  input = "resources_test/openproblems/task_results_v4/raw/metric_configs.yaml",
+  output = "resources_test/openproblems/task_results_v4/processed/metric_info.json"
 )
 ## VIASH END
 
-configs <- yaml::yaml.load_file(par$input)
+source(file.path(meta$resources_dir, "functions.R"))
 
-outputs <- map(configs, function(config) {
-  if (length(config$functionality$status) > 0 && config$functionality$status == "disabled") {
-    return(NULL)
+################################################################################
+#                               FUNCTIONS
+################################################################################
+
+get_implementation_url <- function(config) {
+  paste0(
+    config$build_info$git_remote,
+    "/blob/",
+    config$build_info$git_commit,
+    "/",
+    config$build_info$config |>
+      stringr::str_replace(".*/src/", "src/") |>
+      stringr::str_remove("/config.vsh.yaml")
+  )
+}
+
+get_container_image <- function(config) {
+  # Check if the method has a docker container to create an image url.
+  # If it does not have a docker it will be a nextflow component consisting of
+  # different components that will have a docker image.
+  engines <- config$engines
+  has_docker <- any(purrr::map_lgl(engines, ~ .x$type == "docker"))
+  if (has_docker) {
+    paste0(
+      "https://",
+      config$links$docker_registry,
+      "/",
+      config$package_config$organization,
+      "/",
+      config$package_config$name,
+      "/",
+      config$build_info$config |>
+        stringr::str_remove(".*/src/") |>
+        stringr::str_remove("/config.vsh.yaml"),
+      ":",
+      config$version
+    )
+  } else {
+    paste0(
+      "https://github.com/orgs/openproblems-bio/packages?repo_name=",
+      config$package_config$name,
+      "&q=",
+      config$build_info$config |>
+        stringr::str_remove(".*/src/") |>
+        stringr::str_remove("/config.vsh.yaml")
+    )
   }
+}
+
+get_additional_info <- function(info, exclude, name_prefix = "") {
+  additional <- info[setdiff(names(info), exclude)] |>
+    purrr::map(recurse_unbox)
 
-  # prep for viash 0.9.0
-  build_info <- config$build_info %||% config$info
-  if ("functionality" %in% names(config)) {
-    config[names(config$functionality)] <- config$functionality
-    config[["functionality"]] <- NULL
+  rlang::set_names(additional, paste0(name_prefix, names(additional)))
+}
+
+recurse_unbox <- function(x) {
+  if (is.list(x)) {
+    purrr::map(x, recurse_unbox)
+  } else if (length(x) == 1) {
+    jsonlite::unbox(x)
+  } else {
+    x
   }
+}
 
-  map(
-    config$info$metrics,
-    function(info) {
-      # add extra info
-      info$comp_path <- gsub(".*/src/", "src/", build_info$config) %>% gsub("/config.vsh.yaml", "", .)
-      info$task_id <- gsub("/.*", "", config$namespace)
-      info$id <- info$name
-      info$name <- NULL
-      info$component_name <- config$name
-      info$namespace <- config$namespace
-      info$commit_sha <- build_info$git_commit %||% "missing-sha"
-      info$code_version <- config$version %||% "missing-version"
-      info$image_url <- paste0(
-        "https://",
-        config$links$docker_registry, "/",
-        config$package_config$organization, "/",
-        config$package_config$name, "/",
-        gsub("src/", "", info$comp_path),
-        ":",
-        info$code_version
-      )
-      info$implementation_url <- paste0(
-        build_info$git_remote, "/blob/",
-        build_info$git_commit, "/",
-        info$comp_path
-      )
-      # Flatten references
-      if (!is.null(info$references) && info$references != "") {
-        info <- imap(info$references, function(value, key) {
-          info[[paste0("references_", key)]] <- value
-          return(info)
-        })[[1]]
-      }
-      info$references <- NULL
-
-      # ↑ this could be used as the new format
-
-      # construct v1 format
-      out <- list(
-        task_id = info$task_id,
-        component_name = info$component_name,
-        metric_id = info$id,
-        metric_name = info$label,
-        metric_summary = info$summary,
-        metric_description = info$description,
-        references_doi = info$references_doi %||% NA_character_,
-        references_bibtex = info$references_bibtex %||% NA_character_,
-        implementation_url = info$implementation_url %||% NA_character_,
-        image = info$image_url %||% NA_character_,
-        code_version = info$code_version %||% NA_character_,
-        commit_sha = info$commit_sha,
-        maximize = info$maximize
-      )
-
-      # show warning when certain data is missing and return null?
-      for (n in names(out)) {
-        if (is.null(out[[n]])) {
-          out_as_str <- jsonlite::toJSON(out, auto_unbox = TRUE, pretty = TRUE)
-          stop("missing value for value '", n, "' in ", out_as_str)
-        }
-      }
-
-      # return output
-      out
-    }
-  )
-})
+################################################################################
+#                              MAIN SCRIPT
+################################################################################
+
+cat("====== Get metric info ======\n")
+
+`%||%` <- rlang::`%||%`
+
+cat("\n>>> Reading input files...\n")
+cat("Reading metric info from '", par$input, "'...\n", sep = "")
+metric_configs <- yaml::yaml.load_file(par$input)
 
-outputs <- unlist(outputs, recursive = FALSE)
+cat(
+  "\n>>> Processing ",
+  length(metric_configs),
+  " metric configs...\n",
+  sep = ""
+)
+bibliography <- read_bibliography(
+  file.path(meta$resources_dir, "bibliography.bib")
+)
+metric_info_json <- purrr::map(metric_configs, function(.config) {
+  if (.config$status == "disabled") {
+    cat("Skipping disabled metric component '", .config$name, "'\n", sep = "")
+    return(NULL)
+  } else {
+    cat("Processing metric component '", .config$name, "'\n", sep = "")
+  }
+
+  purrr::map(.config$info$metrics, function(.metric) {
+    list(
+      name = jsonlite::unbox(.metric$name),
+      label = jsonlite::unbox(.metric$label),
+      commit = jsonlite::unbox(
+        .config$build_info$git_commit %||% "missing-sha"
+      ),
+      summary = .metric$summary |>
+        stringr::str_trim() |>
+        stringr::str_remove_all('(^"|"$|^\'|\'$)') |>
+        jsonlite::unbox(),
+      description = .metric$description |>
+        stringr::str_trim() |>
+        stringr::str_remove_all('(^"|"$|^\'|\'$)') |>
+        jsonlite::unbox(),
+      maximize = jsonlite::unbox(.metric$maximize),
+      link_implementation = jsonlite::unbox(get_implementation_url(.config)),
+      link_container_image = jsonlite::unbox(get_container_image(.config)),
+      component_name = jsonlite::unbox(.config$name),
+      authors = get_authors_list(.metric$authors),
+      references = get_references_list(.metric$references, bibliography),
+      additional_info = c(
+        get_additional_info(
+          .config$info,
+          exclude = c("metrics", "type", "type_info"),
+          name_prefix = "component_"
+        ),
+        get_additional_info(
+          .metric,
+          exclude = c(
+            "name",
+            "label",
+            "summary",
+            "description",
+            "maximize",
+            "min",
+            "max",
+            "links",
+            "authors",
+            "references"
+          )
+        )
+      ),
+      version = jsonlite::unbox(.config$version)
+    )
+  })
+}) |>
+  purrr::list_flatten()
 
+cat("\n>>> Writing output files...\n")
+cat("Writing task info to '", par$output, "'...\n", sep = "")
 jsonlite::write_json(
-  outputs,
+  metric_info_json,
   par$output,
-  auto_unbox = TRUE,
-  pretty = TRUE
-)
\ No newline at end of file
+  pretty = TRUE,
+  null = "null"
+)
+
+cat("\n>>> Validating output against schema...\n")
+results_schemas <- file.path(meta$resources_dir, "schemas", "results_v4")
+ajv_args <- paste(
+  "validate",
+  "--spec draft2020",
+  "-s",
+  file.path(results_schemas, "metric_info.json"),
+  "-r",
+  file.path(results_schemas, "core.json"),
+  "-d",
+  par$output
+)
+
+cat("Running validation command:", "ajv", ajv_args, "\n")
+cat("Output:\n")
+validation_result <- system2("ajv", ajv_args)
+
+if (validation_result == 0) {
+  cat("JSON validation passed successfully!\n")
+} else {
+  cat("JSON validation failed!\n")
+  stop("Output JSON does not conform to schema")
+}
+
+cat("\n>>> Done!\n")
diff --git a/src/reporting/get_results/config.vsh.yaml b/src/reporting/get_results/config.vsh.yaml
index 3b5f7c6eb..20477062c 100644
--- a/src/reporting/get_results/config.vsh.yaml
+++ b/src/reporting/get_results/config.vsh.yaml
@@ -1,63 +1,92 @@
 name: get_results
 namespace: reporting
-description: Extract execution info
+description: Create a schema-compliant results JSON
+
 argument_groups:
   - name: Inputs
     arguments:
       - name: --input_scores
         type: file
-        description: Scores file
-        example: resources_test/openproblems/task_results_v3/raw/score_uns.yaml
-      - name: --input_execution
+        description: Scores YAML file
+        required: true
+        example: resources_test/openproblems/task_results_v4/raw/score_uns.yaml
+      - name: --input_trace
         type: file
-        description: Nextflow log file
-        example: resources_test/openproblems/task_results_v3/raw/trace.txt
+        description: Nextflow trace file
+        required: true
+        example: resources_test/openproblems/task_results_v4/raw/trace.txt
       - name: --input_dataset_info
         type: file
-        description: Method info file
-        example: resources_test/openproblems/task_results_v3/processed/dataset_info.json
+        description: Dataset info JSON file
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/dataset_info.json
+        required: true
+        example: resources_test/openproblems/task_results_v4/processed/dataset_info.json
       - name: --input_method_info
         type: file
-        description: Method info file
-        example: resources_test/openproblems/task_results_v3/processed/method_info.json
+        description: Method info JSON file
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/method_info.json
+        required: true
+        example: resources_test/openproblems/task_results_v4/processed/method_info.json
       - name: --input_metric_info
         type: file
-        description: Metric info file
-        example: resources_test/openproblems/task_results_v3/processed/metric_info.json
-  - name: Outputs
-    arguments:
-      - name: --output_results
-        type: file
-        direction: output
-        description: Output json
-        default: results.json
+        description: Metric info JSON file
         info:
           format:
             type: json
-            # TODO: add schema
-      - name: --output_metric_execution_info
+            schema: /common/schemas/results_v4/metric_info.json
+        required: true
+        example: resources_test/openproblems/task_results_v4/processed/metric_info.json
+
+  - name: Outputs
+    arguments:
+      - name: --output
         type: file
         direction: output
-        description: Output metric execution info
-        default: metric_execution_info.json
+        description: Output JSON file matching results schema
+        default: results.json
         info:
           format:
             type: json
-            # TODO: add schema
+            schema: /common/schemas/results_v4/results.json
+        example: resources_test/openproblems/task_results_v4/processed/results.json
+
 resources:
   - type: r_script
     path: script.R
+  - path: /common/schemas
+    dest: schemas
+
 test_resources:
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
-  - path: /resources_test/openproblems/task_results_v3
-    dest: resources_test/openproblems/task_results_v3
+  - path: /resources_test/openproblems/task_results_v4
+    dest: resources_test/openproblems/task_results_v4
+
 engines:
   - type: docker
     image: openproblems/base_r:1
     setup:
+      - type: apt
+        packages:
+        - nodejs
+        - npm
+      - type: docker
+        run: npm install -g ajv-cli
       - type: r
-        cran: [ purrr, yaml, rlang, dplyr, tidyr, readr, lubridate, dynutils, processx ]
+        cran:
+        - dplyr
+        - lubridate
+        - purrr
+        - readr
+        - stringr
+        - tidyr
+
 runners:
   - type: executable
   - type: nextflow
diff --git a/src/reporting/get_results/script.R b/src/reporting/get_results/script.R
index 6b4555665..82c2b1691 100644
--- a/src/reporting/get_results/script.R
+++ b/src/reporting/get_results/script.R
@@ -1,302 +1,342 @@
-requireNamespace("jsonlite", quietly = TRUE)
-requireNamespace("yaml", quietly = TRUE)
-requireNamespace("dynutils", quietly = TRUE)
-requireNamespace("readr", quietly = TRUE)
-requireNamespace("lubridate", quietly = TRUE)
-library(dplyr, warn.conflicts = FALSE)
-library(tidyr, warn.conflicts = FALSE)
-library(purrr, warn.conflicts = FALSE)
-library(rlang, warn.conflicts = FALSE)
-
 ## VIASH START
-# raw_dir <- "resources_test/openproblems/task_results_v3/raw"
-# processed_dir <- "resources_test/openproblems/task_results_v3/processed"
-# raw_dir <- "/home/rcannood/workspace/openproblems-bio/task_perturbation_prediction/resources/results/run_2024-10-31_06-14-14"
-# processed_dir <- "/home/rcannood/workspace/openproblems-bio/website/results/perturbation_prediction/data"
-raw_dir <- "/home/rcannood/workspace/openproblems-bio/task_batch_integration/resources/results/run_2024-11-20_12-47-03"
-processed_dir <- "/home/rcannood/workspace/openproblems-bio/website/results/batch_integration/data"
+raw_dir <- "resources_test/openproblems/task_results_v4/raw"
+processed_dir <- "resources_test/openproblems/task_results_v4/processed"
 
 par <- list(
-  # inputs
+  # Inputs
   input_scores = paste0(raw_dir, "/score_uns.yaml"),
-  input_execution = paste0(raw_dir, "/trace.txt"),
+  input_trace = paste0(raw_dir, "/trace.txt"),
   input_dataset_info = paste0(processed_dir, "/dataset_info.json"),
   input_method_info = paste0(processed_dir, "/method_info.json"),
-  input_method_configs = paste0(raw_dir, "/method_configs.yaml"),
   input_metric_info = paste0(processed_dir, "/metric_info.json"),
-  # outputs
-  output_results = paste0(processed_dir, "/results.json"),
-  output_metric_execution_info = paste0(processed_dir, "/metric_execution_info.json")
+  # Outputs
+  output = paste0(processed_dir, "/results.json")
 )
 ## VIASH END
 
-# --- helper functions ---------------------------------------------------------
-cat("Loading helper functions\n")
-parse_exit <- function(x) {
-  if (is.na(x) || x == "-") {
-    NA_integer_
-  } else {
-    as.integer(x)
-  }
-}
-parse_duration <- function(x) {
-  if (is.na(x) || x == "-") {
-    NA_real_
-  } else {
-    as.numeric(lubridate::duration(toupper(x)))
-  }
+################################################################################
+#                               FUNCTIONS
+################################################################################
+
+parse_exit_code <- function(exit_codes) {
+  exit_codes <- as.integer(exit_codes)
+  # Set missing exit codes to -1 for "Unknown error"
+  exit_codes[is.na(exit_codes)] <- -1L
+  exit_codes
 }
-parse_cpu <- function(x) {
-  if (is.na(x) || x == "-") {
-    NA_real_
-  } else {
-    as.numeric(gsub(" *%", "", x))
-  }
+
+parse_duration <- function(durations) {
+  durations |>
+    toupper() |>
+    lubridate::duration() |>
+    as.numeric()
 }
-parse_size <- function(x) {
-  out <-
-    if (is.na(x) || x == "-") {
-      NA_integer_
-    } else if (grepl("TB", x)) {
-      as.numeric(gsub(" *TB", "", x)) * 1024 * 1024
-    } else if (grepl("GB", x)) {
-      as.numeric(gsub(" *GB", "", x)) * 1024
-    } else if (grepl("MB", x)) {
-      as.numeric(gsub(" *MB", "", x))
-    } else if (grepl("KB", x)) {
-      as.numeric(gsub(" *KB", "", x)) / 1024
-    } else if (grepl("B", x)) {
-      as.numeric(gsub(" *B", "", x)) / 1024 / 1024
-    } else {
-      NA_integer_
-    }
-  as.integer(ceiling(out))
+
+parse_cpu_pct <- function(cpu_pcts) {
+  cpu_pcts |>
+    stringr::str_remove(" *%") |>
+    as.numeric()
 }
 
-# --- read input files ---------------------------------------------------------
-cat("Reading input files\n")
-# read scores
-raw_scores <-
-  yaml::yaml.load_file(par$input_scores) %>%
-  map_df(function(x) {
-    tryCatch({
-      as_tibble(as.data.frame(
-        x[c("dataset_id", "method_id", "metric_ids", "metric_values")]
-      ))
-    }, error = function(e) {
-      message("Encountered error while reading scores.\n  Error: ", e$message, "\n  Data: ", paste(paste0(names(x), "=", x), collapse = ", "))
-      NULL
-    })
-  })
+parse_memory <- function(memories) {
+  values <- memories |>
+    stringr::str_remove("[[:blank:][:alpha:]]+") |>
+    as.numeric()
 
-# read metric info
-dataset_info <- jsonlite::read_json(par$input_dataset_info, simplifyVector = TRUE)
-method_info <- jsonlite::read_json(par$input_method_info, simplifyVector = TRUE)
-metric_info <- jsonlite::read_json(par$input_metric_info, simplifyVector = TRUE)
+  units <- stringr::str_remove(memories, "[[:digit:]\\.[:blank:]]+")
 
-# --- process scores and execution info ----------------------------------------
-cat("Processing scores and execution info\n")
-scale_scores <- function(values, is_control, maximize) {
-  control_values <- values[is_control & !is.na(values)]
-  if (length(control_values) < 2) {
-    return(NA_real_)
-  }
-
-  min_control_value <- min(control_values)
-  max_control_value <- max(control_values)
+  multipliers <- dplyr::case_when(
+    units == "TB" ~ 1024 * 1024,
+    units == "GB" ~ 1024,
+    units == "MB" ~ 1,
+    units == "KB" ~ 1 / 1024,
+    units == "B" ~ 1 / 1024 / 1024,
+    TRUE ~ NA
+  )
 
-  if (min_control_value == max_control_value) {
-    return(NA_real_)
-  }
+  (values * multipliers) |>
+    ceiling() |>
+    as.integer()
+}
 
-  scaled <- (values - min_control_value) / (max_control_value - min_control_value)
+missing_to_empty <- function(
+  values,
+  mode = c("character", "numeric", "integer")
+) {
+  mode <- match.arg(mode)
 
-  if (maximize) {
-    scaled
+  if (is.null(values) || (length(values) == 1 && is.na(values))) {
+    switch(
+      mode,
+      character = character(0),
+      numeric = numeric(0),
+      integer = integer(0)
+    )
   } else {
-    1 - scaled
+    values
   }
 }
-aggregate_scores <- function(scaled_score) {
-  mean(pmin(1, pmax(0, scaled_score)) %|% 0)
-}
-scores <- raw_scores %>%
-  complete(
-    dataset_id,
-    method_id,
-    metric_ids,
-    fill = list(metric_values = NA_real_)
-  ) %>%
-  left_join(method_info %>% select(method_id, is_baseline), by = "method_id") %>%
-  left_join(metric_info %>% select(metric_ids = metric_id, maximize), by = "metric_ids") %>%
-  group_by(metric_ids, dataset_id) %>%
-  mutate(scaled_score = scale_scores(metric_values, is_baseline, maximize[[1]]) %|% 0) %>%
-  group_by(dataset_id, method_id) %>%
-  summarise(
-    metric_values = list(as.list(setNames(metric_values, metric_ids))),
-    scaled_scores = list(as.list(setNames(scaled_score, metric_ids))),
-    mean_score = aggregate_scores(scaled_score),
-    .groups = "drop"
-  )
-
 
-# read execution info
-# -> only keep the last execution of each process
-input_execution <- readr::read_tsv(par$input_execution) |>
-  group_by(name) |>
-  mutate(num_runs = n()) |>
-  slice(which.max(submit)) |>
-  ungroup()
+map_missing_to_empty <- function(
+  values_list,
+  mode = c("character", "numeric")
+) {
+  purrr::map(values_list, missing_to_empty, mode = mode)
+}
 
-method_lookup <- map_dfr(method_info$method_id, function(method_id) {
-  regex <- paste0("(.*:", method_id, ":[^ ]*)")
-  name <-
-    input_execution$name[grepl(regex, input_execution$name)] |>
-    unique()
-  name_ <- name[!grepl(":publishStatesProc", name)]
-  tibble(method_id = method_id, name = name_)
-})
-dataset_lookup <- map_dfr(dataset_info$dataset_id, function(dataset_id) {
-  regex <- paste0(".*[(.](", dataset_id, ")[)./].*")
-  name <-
-    input_execution$name[grepl(regex, input_execution$name)] |>
-    unique()
-  tibble(dataset_id = dataset_id, name = name)
-})
+################################################################################
+#                              MAIN SCRIPT
+################################################################################
 
-# parse values
-execution_info_ind <- input_execution |>
-  left_join(method_lookup, by = "name") |>
-  left_join(dataset_lookup, by = "name") |>
-  filter(!is.na(method_id)) %>%
-  rowwise() |>
-  mutate(
-    process_id = gsub(" .*", "", name),
-    submit = strptime(submit, "%Y-%m-%d %H:%M:%S"),
-    exit_code = parse_exit(exit),
-    duration_sec = parse_duration(realtime),
-    cpu_pct = parse_cpu(`%cpu`),
-    peak_memory_mb = parse_size(peak_vmem),
-    disk_read_mb = parse_size(rchar),
-    disk_write_mb = parse_size(wchar)
-  ) |>
-  ungroup()
+cat("====== Get results ======\n")
 
-execution_info <- execution_info_ind |>
-  group_by(dataset_id, method_id) |>
-  summarise(
-    resources = list(list(
-      submit = min(submit),
-      exit_code = max(exit_code),
-      duration_sec = sum(duration_sec),
-      cpu_pct = sum(cpu_pct * duration_sec) / sum(duration_sec),
-      peak_memory_mb = max(peak_memory_mb),
-      disk_read_mb = sum(disk_read_mb),
-      disk_write_mb = sum(disk_write_mb)
-    )),
-    .groups = "drop"
+cat("\n>>> Reading input files...\n")
+cat("Reading method info from '", par$input_method_info, "'...\n", sep = "")
+method_info <- jsonlite::read_json(par$input_method_info)
+cat("Reading dataset info from '", par$input_dataset_info, "'...\n", sep = "")
+dataset_info <- jsonlite::read_json(par$input_dataset_info)
+cat("Reading metric info from '", par$input_metric_info, "'...\n", sep = "")
+metric_info <- jsonlite::read_json(par$input_metric_info)
+cat("Reading scores from '", par$input_scores, "'...\n", sep = "")
+scores <- yaml::yaml.load_file(par$input_scores) |>
+  purrr::map_dfr(\(.x) {
+    .x[c("dataset_id", "method_id", "metric_ids", "metric_values")] |>
+      tibble::as_tibble()
+  }) |>
+  dplyr::rename(
+    dataset_name = dataset_id,
+    method_name = method_id,
+    metric_name = metric_ids,
+    metric_value = metric_values
   )
-
-# combine scores with execution info
-# fill up missing entries with NAs and 0s
-metric_ids <- unique(raw_scores$metric_ids)
-rep_names <- function(val) {
-  setNames(
-    as.list(rep(val, length(metric_ids))),
-    metric_ids
+cat("Reading execution trace from '", par$input_trace, "'...\n", sep = "")
+method_names <- purrr::map_chr(method_info, "name")
+metric_components <- unique(purrr::map_chr(metric_info, "component_name"))
+trace <- readr::read_tsv(
+  par$input_trace,
+  col_types = readr::cols(
+    task_id = readr::col_integer(),
+    submit = readr::col_datetime(),
+    .default = readr::col_character(),
+  ),
+  na = c("", "-", "NA")
+) |>
+  # Only keep the most recent run of each process
+  dplyr::group_by(name) |>
+  dplyr::slice_max(submit) |>
+  dplyr::ungroup() |>
+  # Separate process name and id
+  dplyr::mutate(name_copy = name) |>
+  tidyr::separate_wider_delim(name_copy, " ", names = c("process", "id")) |>
+  # Extract component from process name
+  dplyr::mutate(
+    component = purrr::map_chr(process, \(.process) {
+      rev(stringr::str_split(.process, ":")[[1]])[1]
+    })
+  ) |>
+  dplyr::mutate(component = stringr::str_remove(component, "_process")) |>
+  # Only keep method and metric components
+  dplyr::filter(
+    component %in% method_names | component %in% metric_components
+  ) |>
+  dplyr::mutate(id = stringr::str_remove_all(id, "\\(|\\)")) |>
+  # Split ID into dataset, method, metric
+  tidyr::separate_wider_delim(
+    id,
+    delim = ".",
+    names = c("dataset_name", "method_name", "metric_component"),
+    too_few = "align_start"
+  ) |>
+  # Parse resources
+  dplyr::mutate(
+    run_exit_code = parse_exit_code(exit),
+    run_duration_secs = parse_duration(realtime),
+    run_cpu_pct = parse_cpu_pct(`%cpu`),
+    run_peak_memory_mb = parse_memory(peak_vmem),
+    run_disk_read_mb = parse_memory(rchar),
+    run_disk_write_mb = parse_memory(wchar)
+  ) |>
+  # Select columns
+  dplyr::select(
+    name,
+    process,
+    component,
+    dataset_name,
+    method_name,
+    metric_component,
+    tidyselect::starts_with("run_")
   )
-}
-out <- full_join(
-  scores,
-  execution_info,
-  by = c("method_id", "dataset_id")
-) %>%
-  rowwise() %>%
-  mutate(
-    task_id = par$task_id,
-    metric_values = list(metric_values %||% rep_names(NA_real_)),
-    scaled_scores = list(scaled_scores %||% rep_names(0)),
-    mean_score = mean_score %|% 0,
-  ) %>%
-  ungroup()
 
+# Dataset names in the trace may have normalisations appended, map back to the name
+dataset_names <- purrr::map_chr(dataset_info, "name")
+process_datasets <- unique(trace$dataset_name)
+dataset_map <- purrr::map_chr(process_datasets, function(.dataset) {
+  dataset_names[stringr::str_detect(.dataset, dataset_names)][1]
+}) |>
+  purrr::set_names(process_datasets)
+trace$dataset_name <- dataset_map[trace$dataset_name]
 
-# --- process metric execution info --------------------------------------------
-cat("Processing metric execution info\n")
-
-# manually add component id to metric info
-metric_info$component_name <- metric_info$component_name %||% rep(NA_character_, nrow(metric_info)) %|%
-  gsub(".*/([^/]*)/config\\.vsh\\.yaml", "\\1", metric_info$implementation_url)
-
-metric_lookup2 <- pmap_dfr(metric_info, function(metric_id, component_name, ...) {
-  regex <- paste0("(.*:", component_name, ":[^ ]*)")
-  name <-
-    input_execution$name[grepl(regex, input_execution$name)] |>
-    unique()
-  name_ <- name[!grepl(":publishStatesProc", name)]
-  tibble(metric_id = metric_id, component_name = component_name, name = name_)
-})
-dataset_lookup2 <- map_dfr(dataset_info$dataset_id, function(dataset_id) {
-  regex <- paste0(".*[(.](", dataset_id, ")[)./].*")
-  name <-
-    input_execution$name[grepl(regex, input_execution$name)] |>
-    unique()
-  tibble(dataset_id = dataset_id, name = name)
-})
-method_lookup2 <- map_dfr(method_info$method_id, function(method_id) {
-  regex <- paste0(".*[(.](", method_id, ")[)./].*")
-  name <-
-    input_execution$name[grepl(regex, input_execution$name)] |>
-    unique()
-  tibble(method_id = method_id, name = name)
-})
+cat("\n>>> Extracting resources...\n")
+cat("Extracting method resources...\n", sep = "")
+method_resources <- trace |>
+  dplyr::filter(component %in% method_names) |>
+  dplyr::group_by(dataset_name, method_name) |>
+  dplyr::summarise(
+    run_exit_code = list(run_exit_code),
+    run_duration_secs = list(run_duration_secs),
+    run_cpu_pct = list(run_cpu_pct),
+    run_peak_memory_mb = list(run_peak_memory_mb),
+    run_disk_read_mb = list(run_disk_read_mb),
+    run_disk_write_mb = list(run_disk_write_mb),
+    .groups = "drop"
+  ) |>
+  dplyr::mutate(
+    succeeded = purrr::map_lgl(run_exit_code, ~ all(.x == 0)),
+    run_exit_code = map_missing_to_empty(run_exit_code, mode = "integer"),
+    run_duration_secs = map_missing_to_empty(
+      run_duration_secs,
+      mode = "numeric"
+    ),
+    run_cpu_pct = map_missing_to_empty(run_cpu_pct, mode = "numeric"),
+    run_peak_memory_mb = map_missing_to_empty(
+      run_peak_memory_mb,
+      mode = "numeric"
+    ),
+    run_disk_read_mb = map_missing_to_empty(run_disk_read_mb, mode = "numeric"),
+    run_disk_write_mb = map_missing_to_empty(
+      run_disk_write_mb,
+      mode = "numeric"
+    )
+  ) |>
+  dplyr::relocate(succeeded, .after = method_name)
 
-metric_execution_info_ind <- input_execution |>
-  left_join(metric_lookup2, by = "name") |>
-  left_join(dataset_lookup2, by = "name") |>
-  left_join(method_lookup2, by = "name") |>
-  filter(!is.na(metric_id)) %>%
-  rowwise() |>
-  mutate(
-    process_id = gsub(" .*", "", name),
-    submit = strptime(submit, "%Y-%m-%d %H:%M:%S"),
-    exit_code = parse_exit(exit),
-    duration_sec = parse_duration(realtime),
-    cpu_pct = parse_cpu(`%cpu`),
-    peak_memory_mb = parse_size(peak_vmem),
-    disk_read_mb = parse_size(rchar),
-    disk_write_mb = parse_size(wchar)
+cat("Extracting metric resources...\n", sep = "")
+metric_resources <- trace |>
+  dplyr::filter(component %in% metric_components) |>
+  dplyr::group_by(dataset_name, method_name, metric_component) |>
+  dplyr::summarise(
+    run_exit_code = list(run_exit_code),
+    run_duration_secs = list(run_duration_secs),
+    run_cpu_pct = list(run_cpu_pct),
+    run_peak_memory_mb = list(run_peak_memory_mb),
+    run_disk_read_mb = list(run_disk_read_mb),
+    run_disk_write_mb = list(run_disk_write_mb),
+    .groups = "drop"
   ) |>
-  ungroup()
+  dplyr::mutate(
+    succeeded = purrr::map_lgl(run_exit_code, ~ all(.x == 0)),
+    run_exit_code = map_missing_to_empty(run_exit_code, mode = "integer"),
+    run_duration_secs = map_missing_to_empty(
+      run_duration_secs,
+      mode = "numeric"
+    ),
+    run_cpu_pct = map_missing_to_empty(run_cpu_pct, mode = "numeric"),
+    run_peak_memory_mb = map_missing_to_empty(
+      run_peak_memory_mb,
+      mode = "numeric"
+    ),
+    run_disk_read_mb = map_missing_to_empty(run_disk_read_mb, mode = "numeric"),
+    run_disk_write_mb = map_missing_to_empty(
+      run_disk_write_mb,
+      mode = "numeric"
+    )
+  ) |>
+  dplyr::relocate(succeeded, .after = method_name)
 
-metric_execution_info <- metric_execution_info_ind |>
-  group_by(dataset_id, method_id, metric_component_name = component_name) |>
-  summarise(
-    resources = list(list(
-      submit = min(submit),
-      exit_code = max(exit_code),
-      duration_sec = sum(duration_sec),
-      cpu_pct = sum(cpu_pct * duration_sec) / sum(duration_sec),
-      peak_memory_mb = max(peak_memory_mb),
-      disk_read_mb = sum(disk_read_mb),
-      disk_write_mb = sum(disk_write_mb)
-    )),
+cat("\n>>> Summarising results...\n")
+metric_component_names <- purrr::map_chr(metric_info, "component_name")
+metric_component_map <- purrr::map_chr(metric_info, "name") |>
+  purrr::set_names(metric_component_names)
+results <- scores |>
+  # There shouldn't be any but remove missing/NaN values just in case
+  dplyr::filter(
+    !is.na(metric_value) & is.finite(metric_value)
+  ) |>
+  dplyr::arrange(dataset_name, method_name, metric_name) |>
+  dplyr::group_by(dataset_name, method_name) |>
+  dplyr::summarise(
+    metric_names = list(metric_name),
+    metric_values = list(metric_value),
     .groups = "drop"
+  ) |>
+  dplyr::full_join(method_resources, by = c("dataset_name", "method_name")) |>
+  dplyr::mutate(
+    metric_components = purrr::map2(
+      dataset_name,
+      method_name,
+      function(.dataset, .method) {
+        metric_resources |>
+          dplyr::filter(
+            dataset_name == .dataset,
+            method_name == .method
+          ) |>
+          dplyr::mutate(
+            metric_names = purrr::map(metric_component, function(.component) {
+              metric_component_map[names(metric_component_map) == .component]
+            })
+          ) |>
+          dplyr::select(
+            component_name = metric_component,
+            metric_names,
+            succeeded,
+            tidyselect::starts_with("run_")
+          )
+      }
+    )
+  ) |>
+  # TODO: Add these once available in output
+  dplyr::mutate(
+    paramset_name = NA,
+    paramset = NA
+  ) |>
+  dplyr::mutate(
+    metric_names = map_missing_to_empty(metric_names, mode = "character"),
+    metric_values = map_missing_to_empty(metric_values, mode = "numeric")
+  ) |>
+  dplyr::select(
+    dataset_name,
+    method_name,
+    paramset_name,
+    paramset,
+    succeeded,
+    tidyselect::starts_with("run_"),
+    metric_names,
+    metric_values,
+    metric_components
   )
 
+dplyr::glimpse(results)
 
-# --- write output files -------------------------------------------------------
-cat("Writing output files\n")
-# write output files
+cat("\n>>> Writing output files...\n")
+cat("Writing results to '", par$output, "'...\n", sep = "")
 jsonlite::write_json(
-  purrr::transpose(out),
-  par$output_results,
-  auto_unbox = TRUE,
-  pretty = TRUE
+  results,
+  par$output,
+  pretty = TRUE,
+  null = "null",
+  na = "null"
 )
-jsonlite::write_json(
-  purrr::transpose(metric_execution_info),
-  par$output_metric_execution_info,
-  auto_unbox = TRUE,
-  pretty = TRUE
+
+cat("\n>>> Validating output against schema...\n")
+results_schemas <- file.path(meta$resources_dir, "schemas", "results_v4")
+ajv_args <- paste(
+  "validate",
+  "--spec draft2020",
+  "-s",
+  file.path(results_schemas, "results.json"),
+  "-d",
+  par$output
 )
+
+cat("Running validation command:", "ajv", ajv_args, "\n")
+cat("Output:\n")
+validation_result <- system2("ajv", ajv_args)
+
+if (validation_result == 0) {
+  cat("JSON validation passed successfully!\n")
+} else {
+  cat("JSON validation failed!\n")
+  stop("Output JSON does not conform to schema")
+}
+
+cat("\n>>> Done!\n")
diff --git a/src/reporting/get_task_info/config.vsh.yaml b/src/reporting/get_task_info/config.vsh.yaml
index 0798159ee..408147d3e 100644
--- a/src/reporting/get_task_info/config.vsh.yaml
+++ b/src/reporting/get_task_info/config.vsh.yaml
@@ -1,35 +1,61 @@
 name: get_task_info
 namespace: reporting
-description: Extract task info
-arguments:
-  - name: --input
-    type: file
-    description: A yaml file
-    required: true
-    example: resources_test/openproblems/task_results_v3/raw/task_info.yaml
-  - name: --output
-    type: file
-    direction: output
-    default: output.json
-    description: Output json
-    info:
-      format:
-        type: json
-        # TODO: add schema
+description: Convert task info YAML to schema-compliant JSON
+
+argument_groups:
+  - name: Inputs
+    arguments:
+    - name: --input
+      type: file
+      description: Task info YAML file
+      required: true
+      example: resources_test/openproblems/task_results_v4/raw/task_info.yaml
+
+  - name: Outputs
+    arguments:
+    - name: --output
+      type: file
+      direction: output
+      default: task_info.json
+      description: Output JSON file matching task info schema
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/task_info.json
+      example: resources_test/openproblems/task_results_v4/processed/task_info.json
+
 resources:
   - type: r_script
     path: script.R
+  - path: /src/reporting/shared/functions.R
+    dest: functions.R
+  - path: /common/schemas
+    dest: schemas
+  - path: /src/reporting/shared/bibliography.bib
+    dest: bibliography.bib
+
 test_resources:
   - type: python_script
     path: /common/component_tests/run_and_check_output.py
-  - path: /resources_test/openproblems/task_results_v3
-    dest: resources_test/openproblems/task_results_v3
+  - path: /resources_test/openproblems/task_results_v4
+    dest: resources_test/openproblems/task_results_v4
+
 engines:
   - type: docker
     image: openproblems/base_r:1
     setup:
+      - type: apt
+        packages:
+        - nodejs
+        - npm
+      - type: docker
+        run: npm install -g ajv-cli
       - type: r
-        cran: [ purrr, yaml, rlang, processx ]
+        cran:
+        - bibtex
+        - purrr
+        - stringr
+
 runners:
   - type: executable
   - type: nextflow
diff --git a/src/reporting/get_task_info/script.R b/src/reporting/get_task_info/script.R
index 5e22fe485..9f1b249ec 100644
--- a/src/reporting/get_task_info/script.R
+++ b/src/reporting/get_task_info/script.R
@@ -1,57 +1,83 @@
-requireNamespace("jsonlite", quietly = TRUE)
-requireNamespace("yaml", quietly = TRUE)
-library(purrr, warn.conflicts = FALSE)
-library(rlang, warn.conflicts = FALSE)
-
-## VIASH START
+### VIASH START
 par <- list(
-  input = "resources_test/openproblems/task_results_v3/raw/task_info.yaml",
-  output = "resources_test/openproblems/task_results_v3/processed/task_info.json"
+  input = "resources_test/openproblems/task_results_v4/raw/task_info.yaml",
+  output = "task_info.json"
 )
 ## VIASH END
 
-info <- yaml::yaml.load_file(par$input)
-# ↑ this could be used as the new format
-
-# construct v1 format
-repo <-
-  if ("links" %in% names(info) && "repository" %in% names(info$links)) {
-    info$links$repository
-  } else if ("name" %in% names(info) && "organization" %in% names(info)) {
-    paste0(info$organization, "/", info$name)
-  } else {
-    "openproblems-bio/openproblems"
-  }
-description <-
-  if ("motivation" %in% names(info)) {
-    paste0(info$motivation, "\n\n", info$description)
-  } else {
-    info$description
-  }
-out <- list(
-  task_id = info$name,
-  commit_sha = NA_character_,
-  task_name = info$label,
-  task_summary = info$summary,
-  task_description = description,
-  repo = repo,
-  issue_tracker = info$links$issue_tracker %||% NA_character_,
-  authors = info$authors,
-  version = info$version,
-  license = info$license %||% NA_character_
+source(file.path(meta$resources_dir, "functions.R"))
+
+cat("====== Get task info ======\n")
+
+`%||%` <- rlang::`%||%`
+cat("\n>>> Reading input files...\n")
+cat("Reading task info from '", par$input, "'...\n", sep = "")
+task_info_yaml <- yaml::read_yaml(par$input)
+
+cat("\n>>> Getting references...\n")
+bibliography <- read_bibliography(
+  file.path(meta$resources_dir, "bibliography.bib")
 )
+references <- get_references_list(task_info_yaml$references, bibliography)
+str(references)
 
-# show warning when certain data is missing and return null?
-for (n in names(out)) {
-  if (is.null(out[[n]])) {
-    out_as_str <- jsonlite::toJSON(out, auto_unbox = TRUE, pretty = TRUE)
-    stop("missing value for value '", n, "' in ", out_as_str)
-  }
-}
+cat("\n>>> Getting authors...\n")
+authors <- get_authors_list(task_info_yaml$authors)
+cat("Found", length(authors), "authors\n")
+
+cat("\n>>> Creating JSON list...\n")
+task_info_json <- list(
+  name = jsonlite::unbox(sub("^task_", "", task_info_yaml$name)), # Remove "task_" prefix
+  commit = jsonlite::unbox(NA_character_), # TODO: Add when available in task_info.yaml
+  label = jsonlite::unbox(task_info_yaml$label),
+  summary = task_info_yaml$summary |>
+    stringr::str_trim() |>
+    stringr::str_remove_all('(^"|"$|^\'|\'$)') |>
+    jsonlite::unbox(),
+  description = task_info_yaml$description |>
+    stringr::str_trim() |>
+    stringr::str_remove_all('(^"|"$|^\'|\'$)') |>
+    jsonlite::unbox(),
+  repository = jsonlite::unbox(task_info_yaml$links$repository),
+  authors = authors,
+  license = jsonlite::unbox(task_info_yaml$license),
+  references = references,
+  version = jsonlite::unbox(task_info_yaml$version),
+  is_prerelease = jsonlite::unbox(TRUE)
+)
+str(task_info_json)
 
+cat("\n>>> Writing output files...\n")
+cat("Writing task info to '", par$output, "'...\n", sep = "")
 jsonlite::write_json(
-  out,
+  task_info_json,
   par$output,
-  auto_unbox = TRUE,
-  pretty = TRUE
+  pretty = TRUE,
+  null = "null"
 )
+
+cat("\n>>> Validating output against schema...\n")
+results_schemas <- file.path(meta$resources_dir, "schemas", "results_v4")
+ajv_args <- paste(
+  "validate",
+  "--spec draft2020",
+  "-s",
+  file.path(results_schemas, "task_info.json"),
+  "-r",
+  file.path(results_schemas, "core.json"),
+  "-d",
+  par$output
+)
+
+cat("Running validation command:", "ajv", ajv_args, "\n")
+cat("Output:\n")
+validation_result <- system2("ajv", ajv_args)
+
+if (validation_result == 0) {
+  cat("JSON validation passed successfully!\n")
+} else {
+  cat("JSON validation failed!\n")
+  stop("Output JSON does not conform to schema")
+}
+
+cat("\n>>> Done!\n")
diff --git a/src/reporting/process_task_results/config.vsh.yaml b/src/reporting/process_task_results/config.vsh.yaml
index 60b687615..e1703bf52 100644
--- a/src/reporting/process_task_results/config.vsh.yaml
+++ b/src/reporting/process_task_results/config.vsh.yaml
@@ -1,84 +1,136 @@
 name: process_task_results
 namespace: reporting
-description: >-
-  This workflow transforms the meta information of the results into a format
+description: |
+  This workflow summarises and collects the output from a task run in a format
   that can be used by the website.
+
 argument_groups:
   - name: Inputs
     arguments:
-      - name: "--input_scores"
+      - name: "--input_task_info"
         type: file
         required: true
         direction: input
-        description: A yaml file containing the scores of each of the methods
-        example: score_uns.yaml
-      - name: "--input_method_configs"
+        description: A YAML file containing task information
+        example: resources_test/openproblems/task_results_v4/raw/task_info.yaml
+      - name: "--input_dataset_info"
         type: file
         required: true
         direction: input
-        example: method_configs.yaml
-      - name: "--input_metric_configs"
+        description: A YAML file containing dataset information
+        example: resources_test/openproblems/task_results_v4/raw/dataset_info.yaml
+      - name: "--input_method_configs"
         type: file
         required: true
         direction: input
-        example: metric_configs.yaml
-      - name: "--input_dataset_info"
+        description: A YAML file containing method configurations
+        example: resources_test/openproblems/task_results_v4/raw/method_configs.yaml
+      - name: "--input_metric_configs"
         type: file
         required: true
         direction: input
-        example: dataset_info.yaml
-      - name: "--input_execution"
+        description: A YAML file containing metric configurations
+        example: resources_test/openproblems/task_results_v4/raw/metric_configs.yaml
+      - name: "--input_scores"
         type: file
         required: true
         direction: input
-        example: trace.txt
-      - name: "--input_task_info"
+        description: A YAML file containing the scores of each of the methods
+        example: resources_test/openproblems/task_results_v4/raw/score_uns.yaml
+      - name: "--input_trace"
         type: file
         required: true
         direction: input
-        example: task_info.yaml
+        description: Nextflow execution trace file
+        example: resources_test/openproblems/task_results_v4/raw/trace.txt
+
   - name: Outputs
     arguments:
-      - name: "--output_scores"
+      - name: "--output_combined"
         type: file
         required: true
         direction: output
-        description: A yaml file containing the scores of each of the methods
-        default: results.json
-      - name: "--output_method_info"
+        description: Combined task results JSON file
+        default: combined_output.json
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/task_results.json
+      - name: "--output_report"
         type: file
         required: true
         direction: output
-        default: method_info.json
-      - name: "--output_metric_info"
+        description: HTML run report
+        default: report.html
+        info:
+          format:
+            type: html
+      - name: "--output_task_info"
         type: file
         required: true
         direction: output
-        default: metric_info.json
+        description: Task info JSON file
+        default: task_info.json
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/task_info.json
       - name: "--output_dataset_info"
         type: file
         required: true
         direction: output
+        description: Dataset info JSON file
         default: dataset_info.json
-      - name: "--output_task_info"
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/dataset_info.json
+      - name: "--output_method_info"
         type: file
         required: true
         direction: output
-        default: task_info.json
-      - name: "--output_qc"
+        description: Method info JSON file
+        default: method_info.json
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/method_info.json
+      - name: "--output_metric_info"
         type: file
         required: true
         direction: output
-        default: quality_control.json
-      - name: "--output_metric_execution_info"
+        description: Metric info JSON file
+        default: metric_info.json
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/metric_info.json
+      - name: "--output_results"
         type: file
         required: true
         direction: output
-        default: metric_execution_info.json
+        description: Results JSON file
+        default: results.json
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/results.json
+      - name: "--output_quality_control"
+        type: file
+        required: true
+        direction: output
+        description: Quality control JSON file
+        default: quality_control.json
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/quality_control.json
+
 resources:
   - type: nextflow_script
     path: main.nf
     entrypoint: run_wf
+
 dependencies:
   - name: reporting/get_results
   - name: reporting/get_method_info
@@ -86,5 +138,8 @@ dependencies:
   - name: reporting/get_dataset_info
   - name: reporting/get_task_info
   - name: reporting/generate_qc
+  - name: reporting/combine_output
+  - name: reporting/render_report
+
 runners:
-  - type: nextflow
\ No newline at end of file
+  - type: nextflow
diff --git a/src/reporting/process_task_results/main.nf b/src/reporting/process_task_results/main.nf
index fc85605ff..1fc64f389 100644
--- a/src/reporting/process_task_results/main.nf
+++ b/src/reporting/process_task_results/main.nf
@@ -1,3 +1,10 @@
+workflow auto {
+  findStates(params, meta.config)
+    | meta.workflow.run(
+      auto: [publish: "state"]
+    )
+}
+
 workflow run_wf {
   take:
   input_ch
@@ -18,62 +25,81 @@ workflow run_wf {
       [id, state + ["task_id": task_id]]
     }
 
+    | get_dataset_info.run(
+      fromState: [
+        "input": "input_dataset_info",
+      ],
+      toState: ["output_dataset": "output"]
+    )
+
     | get_method_info.run(
-      fromState: [ 
+      fromState: [
         "input": "input_method_configs",
       ],
       toState: ["output_method": "output"]
     )
 
     | get_metric_info.run(
-      fromState: [ 
+      fromState: [
         "input": "input_metric_configs",
       ],
       toState: ["output_metric": "output"]
     )
 
-    | get_dataset_info.run(
-      fromState: [
-        "input": "input_dataset_info",
-      ],
-      toState: ["output_dataset": "output"]
-    )
-
     | get_results.run(
-      fromState: [ 
+      fromState: [
         "input_scores": "input_scores",
-        "input_execution": "input_execution",
+        "input_trace": "input_trace",
         "input_dataset_info": "output_dataset",
         "input_method_info": "output_method",
         "input_metric_info": "output_metric"
       ],
       toState: [
-        "output_results": "output_results",
-        "output_metric_execution_info": "output_metric_execution_info"
+        "output_results": "output"
       ]
     )
 
     | generate_qc.run(
       fromState: [
-        "task_info": "output_task",
-        "method_info": "output_method",
-        "metric_info": "output_metric",
-        "dataset_info": "output_dataset",
-        "results": "output_results"
+        "input_task_info": "output_task",
+        "input_dataset_info": "output_dataset",
+        "input_method_info": "output_method",
+        "input_metric_info": "output_metric",
+        "input_results": "output_results"
       ],
       toState: ["output_qc": "output"]
     )
 
+    | combine_output.run(
+      fromState: [
+        "input_task_info": "output_task",
+        "input_quality_control": "output_qc",
+        "input_metric_info": "output_metric",
+        "input_method_info": "output_method",
+        "input_dataset_info": "output_dataset",
+        "input_results": "output_results"
+      ],
+      toState: ["output_combined": "output"]
+    )
+
+    | render_report.run(
+      fromState: [
+        "input_task_results": "output_combined"
+      ],
+      toState: ["output_report": "output"]
+    )
+
     | setState([
-      "output_scores": "output_results",
+      "output_combined": "output_combined",
+      "output_report": "output_report",
+      "output_task_info": "output_task",
+      "output_dataset_info": "output_dataset",
       "output_method_info": "output_method",
       "output_metric_info": "output_metric",
-      "output_dataset_info": "output_dataset",
-      "output_task_info": "output_task",
-      "output_qc": "output_qc",
-      "output_metric_execution_info": "output_metric_execution_info"
+      "output_results": "output_results",
+      "output_quality_control": "output_qc"
     ])
 
   emit:
   output_ch
-}
\ No newline at end of file
+}
diff --git a/src/reporting/render_report/config.vsh.yaml b/src/reporting/render_report/config.vsh.yaml
new file mode 100644
index 000000000..8864c7978
--- /dev/null
+++ b/src/reporting/render_report/config.vsh.yaml
@@ -0,0 +1,82 @@
+name: render_report
+namespace: reporting
+description: Render a HTML report summarizing the results
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input_task_results
+        type: file
+        description: Combined task results JSON file
+        info:
+          format:
+            type: json
+            schema: /common/schemas/results_v4/combined_output.json
+        required: true
+        example: resources_test/openproblems/task_results_v4/processed/combined_output.json
+
+  - name: Outputs
+    arguments:
+    - name: --output
+      type: file
+      direction: output
+      description: HTML report file
+      default: report.html
+      info:
+        format:
+          type: html
+
+resources:
+  - type: r_script
+    path: script.R
+  - path: /src/reporting/render_report/report-template.qmd
+    dest: report.qmd
+  - path: /src/reporting/render_report/report-functions.R
+    dest: functions.R
+  - path: /src/reporting/render_report/logo.svg
+    dest: logo.svg
+
+test_resources:
+  - type: python_script
+    path: /common/component_tests/run_and_check_output.py
+  - path: /resources_test/openproblems/task_results_v4
+    dest: resources_test/openproblems/task_results_v4
+
+engines:
+  - type: docker
+    image: openproblems/base_r:1.0.0
+    setup:
+      - type: docker
+        run: |
+          export QUARTO_VERSION="1.7.32" && \
+          mkdir -p /opt/quarto/${QUARTO_VERSION} && \
+          wget -O quarto.tar.gz "https://github.com/quarto-dev/quarto-cli/releases/download/v${QUARTO_VERSION}/quarto-${QUARTO_VERSION}-linux-amd64.tar.gz" && \
+          tar -zxvf quarto.tar.gz -C "/opt/quarto/${QUARTO_VERSION}" --strip-components=1 && \
+          ln -s /opt/quarto/${QUARTO_VERSION}/bin/quarto /usr/local/bin/quarto && \
+          rm quarto.tar.gz
+      - type: r
+        cran:
+        - commonmark
+        - dplyr
+        - funkyheatmap
+        - ggplot2
+        - here
+        - htmltools
+        - jsonlite
+        - knitr
+        - patchwork
+        - purrr
+        - quarto
+        - rcrossref
+        - reactable
+        - scales
+        - stringr
+        - tibble
+        - tidyr
+        - xfun
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [lowmem, lowtime, lowcpu]
diff --git a/src/reporting/render_report/logo.svg b/src/reporting/render_report/logo.svg
new file mode 100644
index 000000000..70228226e
--- /dev/null
+++ b/src/reporting/render_report/logo.svg
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   data-hk="00000001000000010000000100002150100"
+   stroke="currentColor"
+   stroke-width="1"
+   stroke-linecap="round"
+   stroke-linejoin="round"
+   class="size-4 m-2 h-8 w-auto md:max-lg:hidden "
+   viewBox="0 0 999 168"
+   fill="none"
+   iconclass="stroke-accent"
+   version="1.1"
+   id="svg2"
+   sodipodi:docname="logo.svg"
+   inkscape:version="1.4.2 (1:1.4.2+202505120737+ebf0e940d0)"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg">
+  <defs
+     id="defs2" />
+  <sodipodi:namedview
+     id="namedview2"
+     pagecolor="#505050"
+     bordercolor="#eeeeee"
+     borderopacity="1"
+     inkscape:showpageshadow="0"
+     inkscape:pageopacity="0"
+     inkscape:pagecheckerboard="0"
+     inkscape:deskcolor="#505050"
+     inkscape:zoom="2.4094094"
+     inkscape:cx="251.92896"
+     inkscape:cy="83.837973"
+     inkscape:window-width="2560"
+     inkscape:window-height="1371"
+     inkscape:window-x="1152"
+     inkscape:window-y="96"
+     inkscape:window-maximized="1"
+     inkscape:current-layer="svg2" />
+  <path
+     data-hk="00000001000000010000000100002150101"
+     d="M123.569 83.9994C123.569 90.885 121.81 97.3601 118.719 103M83.9838 44.4147C62.1217 44.4147 44.399 62.1374 44.399 83.9994C44.399 100.493 54.4865 114.631 68.8285 120.579M83.9838 44.4147C98.9602 44.4147 111.994 52.7316 118.719 64.9987M83.9838 44.4147V26.3188M83.9838 26.3188C89.2285 26.3188 94.3097 27.0188 99.1391 28.3304M83.9838 26.3188C61.5316 26.3188 42.0757 39.1469 32.5454 57.8735M83.9838 162.038C95.214 162.038 105.89 159.666 115.538 155.395M5.94531 83.9994C5.94531 71.2551 9.00023 59.2243 14.4177 48.5993M123.077 41.5872C127.379 45.5541 131.075 50.1671 134.013 55.2722M134.013 55.2722C138.88 63.7306 141.664 73.5399 141.664 83.9994C141.664 93.8126 139.214 103.053 134.891 111.143M134.013 55.2722L151.252 44.4147M151.252 44.4147C137.68 21.4011 112.635 5.96094 83.9838 5.96094C53.6286 5.96094 27.3214 23.2923 14.4177 48.5993M151.252 44.4147C158.096 56.02 162.022 69.5511 162.022 83.9994C162.022 115.869 142.919 143.276 115.538 155.395M118.719 103C111.994 115.267 98.9602 123.584 83.9838 123.584C78.6154 123.584 73.4966 122.515 68.8285 120.579M118.719 103L134.891 111.143M134.891 111.143C128.989 122.19 119.597 131.09 108.187 136.372M14.4177 48.5993L32.5454 57.8735M32.5454 57.8735C28.5536 65.7171 26.3032 74.5954 26.3032 83.9994C26.3032 108.174 41.175 128.875 62.2687 137.453M108.187 136.372C100.827 139.779 92.6274 141.68 83.9838 141.68C76.3022 141.68 68.9714 140.178 62.2687 137.453M108.187 136.372L115.538 155.395M68.8285 120.579L62.2687 137.453M99.1391 76.7611C99.1391 85.1311 92.3538 91.9163 83.9838 91.9163C75.6137 91.9163 68.8285 85.1311 68.8285 76.7611C68.8285 68.391 75.6137 61.6058 83.9838 61.6058C92.3538 61.6058 99.1391 68.391 99.1391 76.7611Z"
+     stroke="currentColor"
+     stroke-width="9.01137"
+     class="stroke-accent"
+     id="path1"
+     style="stroke:#165be4;stroke-opacity:1" />
+  <path
+     data-hk="00000001000000010000000100002150102"
+     d="M256.056 127.76C250.527 127.76 245.348 126.78 240.519 124.82C235.689 122.791 231.455 119.956 227.816 116.317C224.246 112.677 221.447 108.408 219.417 103.509C217.388 98.5397 216.373 93.1506 216.373 87.3415C216.373 81.4625 217.388 76.0734 219.417 71.1742C221.447 66.2751 224.246 62.0058 227.816 58.3664C231.455 54.727 235.654 51.9275 240.414 49.9678C245.243 48.0081 250.457 47.0283 256.056 47.0283C261.655 47.0283 266.834 48.0431 271.593 50.0728C276.423 52.0324 280.622 54.832 284.191 58.4714C287.831 62.0408 290.665 66.2751 292.695 71.1742C294.795 76.0734 295.844 81.4625 295.844 87.3415C295.844 93.1506 294.795 98.5397 292.695 103.509C290.665 108.408 287.831 112.677 284.191 116.317C280.622 119.956 276.423 122.791 271.593 124.82C266.834 126.78 261.655 127.76 256.056 127.76ZM256.056 118.206C260.395 118.206 264.35 117.437 267.919 115.897C271.488 114.287 274.568 112.082 277.158 109.283C279.817 106.413 281.847 103.124 283.247 99.4145C284.716 95.6352 285.451 91.6108 285.451 87.3415C285.451 83.0723 284.716 79.0829 283.247 75.3735C281.847 71.6642 279.817 68.4097 277.158 65.6102C274.568 62.7406 271.488 60.536 267.919 58.9963C264.35 57.3865 260.395 56.5817 256.056 56.5817C251.787 56.5817 247.867 57.3865 244.298 58.9963C240.729 60.536 237.614 62.7406 234.955 65.6102C232.365 68.4097 230.335 71.6642 228.866 75.3735C227.396 79.0829 226.661 83.0723 226.661 87.3415C226.661 91.6108 227.396 95.6352 228.866 99.4145C230.335 103.124 232.365 106.413 234.955 109.283C237.614 112.082 240.729 114.287 244.298 115.897C247.867 117.437 251.787 118.206 256.056 118.206ZM309.695 147.496V69.9145H319.248V82.0924L317.988 79.6778C320.088 76.3184 322.957 73.6588 326.597 71.6992C330.236 69.6695 334.401 68.6547 339.09 68.6547C344.409 68.6547 349.168 69.9494 353.367 72.539C357.637 75.1286 360.996 78.663 363.446 83.1422C365.895 87.5515 367.12 92.5907 367.12 98.2597C367.12 103.789 365.895 108.793 363.446 113.272C360.996 117.751 357.637 121.286 353.367 123.875C349.168 126.465 344.374 127.76 338.985 127.76C334.436 127.76 330.271 126.745 326.492 124.715C322.783 122.686 319.913 119.816 317.883 116.107L319.458 114.427V147.496H309.695ZM338.25 118.311C341.819 118.311 345.004 117.437 347.803 115.687C350.603 113.937 352.773 111.557 354.312 108.548C355.922 105.469 356.727 102.039 356.727 98.2597C356.727 94.3404 355.922 90.9109 354.312 87.9714C352.773 84.9619 350.603 82.5823 347.803 80.8326C345.004 79.0129 341.819 78.1031 338.25 78.1031C334.681 78.1031 331.461 78.9779 328.592 80.7276C325.792 82.4774 323.552 84.892 321.873 87.9714C320.263 90.9809 319.458 94.4104 319.458 98.2597C319.458 102.039 320.263 105.469 321.873 108.548C323.552 111.557 325.792 113.937 328.592 115.687C331.461 117.437 334.681 118.311 338.25 118.311ZM405.958 127.76C400.499 127.76 395.635 126.465 391.365 123.875C387.096 121.286 383.737 117.751 381.287 113.272C378.837 108.723 377.613 103.649 377.613 98.0498C377.613 92.3807 378.802 87.3415 381.182 82.9323C383.632 78.523 386.921 75.0586 391.05 72.539C395.25 69.9494 399.939 68.6547 405.118 68.6547C409.317 68.6547 413.027 69.4245 416.246 70.9643C419.536 72.434 422.3 74.4637 424.54 77.0533C426.849 79.5728 428.599 82.4774 429.789 85.7668C431.049 88.9863 431.679 92.3457 431.679 95.8451C431.679 96.615 431.609 97.4898 431.469 98.4697C431.399 99.3795 431.294 100.254 431.154 101.094H384.751V92.6956H425.485L420.865 96.475C421.495 92.8356 421.145 89.5812 419.816 86.7116C418.486 83.8421 416.526 81.5675 413.937 79.8878C411.347 78.2081 408.407 77.3682 405.118 77.3682C401.829 77.3682 398.819 78.2081 396.09 79.8878C393.36 81.5675 391.225 83.9821 389.686 87.1316C388.216 90.2111 387.621 93.8854 387.901 98.1547C387.621 102.284 388.251 105.923 389.791 109.073C391.4 112.152 393.64 114.567 396.509 116.317C399.449 117.996 402.633 118.836 406.063 118.836C409.842 118.836 413.027 117.961 415.616 116.212C418.206 114.462 420.305 112.222 421.915 109.493L430.104 113.692C428.984 116.282 427.234 118.661 424.855 120.831C422.545 122.931 419.781 124.61 416.561 125.87C413.412 127.13 409.877 127.76 405.958 127.76ZM444.409 126.5V69.9145H453.962V80.9376L452.387 79.9928C453.787 76.4234 456.027 73.6588 459.106 71.6992C462.256 69.6695 465.93 68.6547 470.129 68.6547C474.189 68.6547 477.793 69.5645 480.942 71.3842C484.162 73.2039 486.681 75.7235 488.501 78.9429C490.391 82.1624 491.336 85.8018 491.336 89.8611V126.5H481.467V93.0106C481.467 89.8611 480.907 87.2016 479.788 85.0319C478.668 82.8623 477.058 81.1826 474.958 79.9928C472.929 78.733 470.584 78.1031 467.925 78.1031C465.265 78.1031 462.885 78.733 460.786 79.9928C458.756 81.1826 457.146 82.8973 455.957 85.1369C454.767 87.3065 454.172 89.9311 454.172 93.0106V126.5H444.409ZM524.608 126.5V48.2881H552.218C557.397 48.2881 561.947 49.2679 565.866 51.2276C569.785 53.1872 572.83 55.9868 574.999 59.6262C577.239 63.2656 578.359 67.5348 578.359 72.434C578.359 77.3332 577.239 81.6025 574.999 85.2419C572.83 88.8113 569.785 91.6108 565.866 93.6405C562.016 95.6002 557.467 96.58 552.218 96.58H534.896V126.5H524.608ZM534.896 87.1316H552.533C555.683 87.1316 558.412 86.5367 560.722 85.3469C563.031 84.1571 564.816 82.4424 566.076 80.2027C567.336 77.9631 567.965 75.3735 567.965 72.434C567.965 69.4245 567.336 66.835 566.076 64.6653C564.816 62.4257 563.031 60.711 560.722 59.5212C558.412 58.3314 555.683 57.7365 552.533 57.7365H534.896V87.1316ZM591.117 126.5V69.9145H600.671V80.3077L599.621 78.838C600.951 75.6185 602.98 73.2389 605.71 71.6992C608.44 70.0894 611.764 69.2846 615.683 69.2846H619.148V78.523H614.214C610.224 78.523 607.005 79.7828 604.555 82.3024C602.106 84.752 600.881 88.2514 600.881 92.8006V126.5H591.117ZM654.696 127.76C649.237 127.76 644.303 126.5 639.893 123.98C635.484 121.391 631.985 117.856 629.395 113.377C626.805 108.898 625.511 103.824 625.511 98.1547C625.511 92.4857 626.77 87.4465 629.29 83.0373C631.88 78.628 635.379 75.1286 639.788 72.539C644.198 69.9494 649.167 68.6547 654.696 68.6547C660.155 68.6547 665.089 69.9494 669.498 72.539C673.908 75.0586 677.372 78.523 679.892 82.9323C682.481 87.3415 683.776 92.4157 683.776 98.1547C683.776 103.894 682.446 109.003 679.787 113.482C677.127 117.891 673.593 121.391 669.183 123.98C664.844 126.5 660.015 127.76 654.696 127.76ZM654.696 118.311C658.195 118.311 661.345 117.437 664.144 115.687C667.014 113.937 669.253 111.523 670.863 108.443C672.543 105.364 673.383 101.934 673.383 98.1547C673.383 94.3054 672.543 90.9109 670.863 87.9714C669.253 84.9619 667.014 82.5823 664.144 80.8326C661.345 79.0129 658.195 78.1031 654.696 78.1031C651.126 78.1031 647.907 79.0129 645.037 80.8326C642.238 82.5823 639.998 84.9619 638.319 87.9714C636.639 90.9109 635.799 94.3054 635.799 98.1547C635.799 101.934 636.639 105.364 638.319 108.443C639.998 111.523 642.238 113.937 645.037 115.687C647.907 117.437 651.126 118.311 654.696 118.311ZM725.8 127.76C721.251 127.76 717.087 126.745 713.307 124.715C709.598 122.686 706.728 119.816 704.699 116.107L706.063 114.427V126.5H696.51V47.0283H706.273V82.0924L704.804 79.6778C706.903 76.3184 709.773 73.6588 713.412 71.6992C717.052 69.6695 721.216 68.6547 725.905 68.6547C731.224 68.6547 735.983 69.9494 740.183 72.539C744.452 75.1286 747.811 78.663 750.261 83.1422C752.711 87.5515 753.935 92.5907 753.935 98.2597C753.935 103.789 752.711 108.793 750.261 113.272C747.811 117.751 744.452 121.286 740.183 123.875C735.983 126.465 731.189 127.76 725.8 127.76ZM725.065 118.311C728.635 118.311 731.819 117.437 734.619 115.687C737.418 113.937 739.588 111.557 741.128 108.548C742.737 105.469 743.542 102.039 743.542 98.2597C743.542 94.3404 742.737 90.9109 741.128 87.9714C739.588 84.9619 737.418 82.5823 734.619 80.8326C731.819 79.0129 728.635 78.1031 725.065 78.1031C721.496 78.1031 718.276 78.9779 715.407 80.7276C712.607 82.4774 710.368 84.892 708.688 87.9714C707.078 90.9809 706.273 94.4104 706.273 98.2597C706.273 102.039 707.078 105.469 708.688 108.548C710.368 111.557 712.607 113.937 715.407 115.687C718.276 117.437 721.496 118.311 725.065 118.311ZM766.738 126.5V47.0283H776.501V126.5H766.738ZM817.686 127.76C812.227 127.76 807.363 126.465 803.093 123.875C798.824 121.286 795.465 117.751 793.015 113.272C790.565 108.723 789.341 103.649 789.341 98.0498C789.341 92.3807 790.53 87.3415 792.91 82.9323C795.36 78.523 798.649 75.0586 802.778 72.539C806.978 69.9494 811.667 68.6547 816.846 68.6547C821.045 68.6547 824.755 69.4245 827.974 70.9643C831.264 72.434 834.028 74.4637 836.268 77.0533C838.577 79.5728 840.327 82.4774 841.517 85.7668C842.777 88.9863 843.407 92.3457 843.407 95.8451C843.407 96.615 843.337 97.4898 843.197 98.4697C843.127 99.3795 843.022 100.254 842.882 101.094H796.479V92.6956H837.213L832.593 96.475C833.223 92.8356 832.873 89.5812 831.544 86.7116C830.214 83.8421 828.254 81.5675 825.665 79.8878C823.075 78.2081 820.136 77.3682 816.846 77.3682C813.557 77.3682 810.547 78.2081 807.818 79.8878C805.088 81.5675 802.953 83.9821 801.414 87.1316C799.944 90.2111 799.349 93.8854 799.629 98.1547C799.349 102.284 799.979 105.923 801.519 109.073C803.128 112.152 805.368 114.567 808.238 116.317C811.177 117.996 814.361 118.836 817.791 118.836C821.57 118.836 824.755 117.961 827.344 116.212C829.934 114.462 832.034 112.222 833.643 109.493L841.832 113.692C840.712 116.282 838.962 118.661 836.583 120.831C834.273 122.931 831.509 124.61 828.289 125.87C825.14 127.13 821.605 127.76 817.686 127.76ZM856.137 126.5V69.9145H865.69V81.4625L864.325 79.6778C865.725 76.1084 867.965 73.3789 871.044 71.4892C874.124 69.5995 877.588 68.6547 881.437 68.6547C885.847 68.6547 889.801 69.8795 893.3 72.329C896.87 74.7786 899.319 77.9981 900.649 81.9874L897.92 82.0924C899.389 77.6832 901.874 74.3587 905.373 72.1191C908.873 69.8095 912.757 68.6547 917.026 68.6547C920.946 68.6547 924.48 69.5645 927.63 71.3842C930.849 73.2039 933.404 75.7235 935.293 78.9429C937.183 82.1624 938.128 85.8018 938.128 89.8611V126.5H928.26V93.0106C928.26 89.8611 927.7 87.2016 926.58 85.0319C925.46 82.8623 923.92 81.1826 921.961 79.9928C920.071 78.733 917.831 78.1031 915.242 78.1031C912.722 78.1031 910.448 78.733 908.418 79.9928C906.458 81.1826 904.883 82.8973 903.694 85.1369C902.574 87.3065 902.014 89.9311 902.014 93.0106V126.5H892.146V93.0106C892.146 89.8611 891.586 87.2016 890.466 85.0319C889.346 82.8623 887.806 81.1826 885.847 79.9928C883.957 78.733 881.717 78.1031 879.128 78.1031C876.608 78.1031 874.334 78.733 872.304 79.9928C870.344 81.1826 868.77 82.8973 867.58 85.1369C866.46 87.3065 865.9 89.9311 865.9 93.0106V126.5H856.137ZM972.256 127.76C966.587 127.76 961.653 126.36 957.454 123.56C953.254 120.761 950.28 116.982 948.53 112.222L956.299 108.443C957.909 111.802 960.113 114.462 962.913 116.422C965.712 118.381 968.827 119.361 972.256 119.361C975.336 119.361 977.89 118.626 979.92 117.157C981.95 115.687 982.964 113.762 982.964 111.383C982.964 109.703 982.475 108.373 981.495 107.393C980.585 106.343 979.465 105.539 978.135 104.979C976.806 104.349 975.581 103.894 974.461 103.614L965.957 101.199C960.848 99.7295 957.104 97.6298 954.724 94.9003C952.415 92.1707 951.26 88.9863 951.26 85.3469C951.26 81.9874 952.1 79.0829 953.779 76.6333C955.529 74.1137 957.874 72.1541 960.813 70.7543C963.823 69.3545 967.182 68.6547 970.891 68.6547C975.861 68.6547 980.305 69.9145 984.224 72.434C988.214 74.9536 991.048 78.488 992.728 83.0373L984.749 86.7116C983.489 83.7721 981.6 81.4625 979.08 79.7828C976.561 78.0331 973.726 77.1582 970.577 77.1582C967.707 77.1582 965.432 77.8931 963.753 79.3629C962.073 80.7626 961.233 82.5473 961.233 84.717C961.233 86.3267 961.653 87.6565 962.493 88.7063C963.333 89.6861 964.348 90.456 965.537 91.0159C966.727 91.5058 967.882 91.9258 969.002 92.2757L978.24 95.0053C982.859 96.335 986.429 98.3997 988.948 101.199C991.538 103.999 992.833 107.358 992.833 111.278C992.833 114.427 991.958 117.262 990.208 119.781C988.459 122.301 986.044 124.26 982.964 125.66C979.885 127.06 976.316 127.76 972.256 127.76Z"
+     fill="currentColor"
+     stroke-width="0"
+     id="path2" />
+</svg>
diff --git a/src/reporting/render_report/report-functions.R b/src/reporting/render_report/report-functions.R
new file mode 100644
index 000000000..fcb3fea28
--- /dev/null
+++ b/src/reporting/render_report/report-functions.R
@@ -0,0 +1,549 @@
+# Tables ----
+
+#' Get authors table
+#'
+#' @param authors Authors list from results JSON
+#'
+#' @returns A `reactable` table containing the authors
+get_authors_table <- function(authors) {
+  authors_data <- purrr::map_dfr(authors, function(.author) {
+    other_info <- purrr::map_chr(names(.author$info), \(.info) {
+      paste(.info, toString(.author$info[[.info]]), sep = ": ")
+    }) |>
+      paste(collapse = ", ")
+
+    data.frame(
+      name = .author$name,
+      roles = paste(.author$roles, collapse = ", "),
+      github = .author$github %||% NA_character_,
+      orcid = .author$orcid %||% NA_character_,
+      info = other_info
+    )
+  })
+
+  colnames(authors_data) <- stringr::str_to_sentence(colnames(authors_data))
+  reactable::reactable(
+    authors_data,
+    columns = list(
+      Roles = reactable::colDef(name = "Roles"),
+      Github = reactable::colDef(
+        name = "GitHub",
+        cell = function(value, index, column) {
+          if (!is.na(value)) {
+            paste0("<a href='https://github.com/", value, "'>", value, "</a>")
+          } else {
+            ""
+          }
+        },
+        style = list("font-family" = "monospace"),
+        html = TRUE
+      ),
+      Orcid = reactable::colDef(
+        name = "ORCiD",
+        cell = function(value, index, column) {
+          if (!is.na(value)) {
+            paste0("<a href='https://orcid.org/", value, "'>", value, "</a>")
+          } else {
+            ""
+          }
+        },
+        html = TRUE
+      )
+    ),
+    striped = TRUE,
+    sortable = FALSE
+  )
+}
+
+#' Get references table
+#'
+#' @param references References list from results JSON
+#'
+#' @returns A `reactable` table containing the references
+#'
+#' @details
+#' Information for DOI references is retrieved from CrossRef. BibTeX references
+#' are formatted as code and ID references are shown as IDs.
+get_references_table <- function(references) {
+  if (all(c("doi", "bibtex") %in% names(references))) {
+    references_df <- data.frame(
+      reference_type = character(0),
+      reference = character(0)
+    )
+
+    dois <- references$doi
+    if (!(is.null(dois) || length(dois) == 0)) {
+      doi_strs <- unlist(rcrossref::cr_cn(references$doi, format = "text"))
+      references_df <- dplyr::bind_rows(
+        references_df,
+        data.frame(
+          reference_type = "DOI",
+          reference = doi_strs
+        )
+      )
+    }
+
+    bibtex <- references$bibtex
+    if (!(is.null(bibtex) || length(bibtex) == 0)) {
+      bibtex_strs <- purrr::map_chr(bibtex, function(.bibtex) {
+        prettify_bibtex(.bibtex, output = "html")
+      })
+      references_df <- dplyr::bind_rows(
+        references_df,
+        data.frame(
+          reference_type = "BibTeX",
+          reference = bibtex_strs
+        )
+      )
+    }
+  } else {
+    references_df <- data.frame(
+      reference_type = "ID",
+      reference = unlist(references)
+    )
+  }
+
+  reactable::reactable(
+    references_df,
+    columns = list(
+      reference = reactable::colDef(
+        name = "References",
+        cell = function(value, index, column) {
+          reference_type <- references_df$reference_type[[index]]
+
+          if (reference_type == "ID") {
+            paste("ID:", value)
+          } else {
+            value
+          }
+        },
+        style = function(value, row) {
+          reference_type <- references_df$reference_type[[row]]
+
+          if (reference_type == "BibTeX") {
+            list("font-family" = "monospace")
+          } else if (reference_type == "ID") {
+            list("font-family" = "monospace")
+          }
+        },
+        html = TRUE
+      ),
+      reference_type = reactable::colDef(show = FALSE)
+    ),
+    striped = TRUE,
+    sortable = FALSE
+  )
+}
+
+#' Get source table
+#'
+#' @param details_df A data frame containing details
+#' @param source_columns A character vector of column names to include in the
+#'   source table
+#'
+#' @returns A `reactable` table containing the source information
+#'
+#' @details
+#' The source columns are formatted as monospace text
+get_source_table <- function(details_df, source_columns) {
+  source_df <- details_df[, source_columns, drop = FALSE]
+
+  reactable::reactable(
+    source_df,
+    columns = purrr::map(names(source_columns), function(.label) {
+      reactable::colDef(
+        name = .label,
+        style = list("font-family" = "monospace")
+      )
+    }) |>
+      purrr::set_names(source_columns),
+    sortable = FALSE
+  )
+}
+
+
+#' Get description table
+#'
+#' @param description_df A data frame containing the description information
+#'
+#' @returns A `reactable` table containing the description
+#'
+#' @details
+#' The description Markdown is rendered as HTML
+get_description_table <- function(description_df) {
+  reactable::reactable(
+    description_df,
+    columns = list(
+      description = reactable::colDef(
+        name = "Description",
+        cell = function(value) {
+          commonmark::markdown_html(value)
+        },
+        html = TRUE
+      )
+    ),
+    sortable = FALSE
+  )
+}
+
+#' Get links table
+#'
+#' @param details_df A data frame containing details
+#' @param link_columns A character vector of column names to include in the
+#'   links table
+#'
+#' @returns A `reactable` table containing the links
+#'
+#' @details
+#' The link columns are formatted as HTML links
+get_links_table <- function(details_df, link_columns) {
+  links_df <- details_df[, link_columns, drop = FALSE]
+
+  reactable::reactable(
+    links_df,
+    columns = purrr::map(names(link_columns), function(.label) {
+      reactable::colDef(
+        name = .label,
+        cell = format_html_link,
+        html = TRUE
+      )
+    }) |>
+      purrr::set_names(link_columns),
+    sortable = FALSE
+  )
+}
+
+#' Get additional information table
+#'
+#' @param additional_info A list containing additional information to display in
+#'   a table
+#'
+#' @returns A `reactable` table containing the additional information or a HTML
+#'   div with a message
+#'
+#' @details
+#' Nicer heading are created from the column names, otherwise values are shown
+#' as given. The additional information can contain any fields so we cannot
+#' handle them specifically.
+#'
+#' If there are is no additional information, a div containing a message is
+#' returned. A message is also returned if the additional information fails to
+#' render.
+get_additional_info_table <- function(additional_info) {
+  if (is.null(additional_info) || length(additional_info) == 0) {
+    return(htmltools::div(
+      "No additional information found",
+      style = "padding: 0.5rem"
+    ))
+  }
+
+  tryCatch(
+    {
+      additional_data <- additional_info |>
+        purrr::map(\(.x) {
+          paste(.x, collapse = ", ")
+        }) |>
+        as.data.frame()
+
+      colnames(additional_data) <- colnames(additional_data) |>
+        stringr::str_replace_all("_", " ") |>
+        stringr::str_to_sentence()
+
+      reactable::reactable(
+        additional_data
+      )
+    },
+    error = function(e) {
+      htmltools::div(
+        paste(
+          "Additional information failed to render with error: ",
+          e$message
+        ),
+        style = "padding: 0.5rem",
+      )
+    }
+  )
+}
+
+#' Get quality control table
+#'
+#' @param qc_df A data frame containing quality control information
+#'
+#' @returns A `reactable` table containing the quality control checks
+get_qc_table <- function(qc_df) {
+  reactable::reactable(
+    qc_df[, c("label", "severity")],
+
+    columns = list(
+      label = reactable::colDef(name = "Check"),
+      severity = reactable::colDef(
+        name = "Severity",
+        cell = function(value) {
+          switch(value, "1" = "❌", "2" = "❌❌", "3" = "❌❌❌", )
+        }
+      )
+    ),
+
+    details = function(index, column) {
+      details_df <- qc_df[index, , drop = FALSE]
+
+      details_table <- reactable::reactable(
+        details_df[, c("value", "condition", "severity_value")],
+        columns = list(
+          value = reactable::colDef(
+            name = "Value",
+            format = reactable::colFormat(digits = 2),
+            width = 100
+          ),
+          condition = reactable::colDef(
+            name = "Condition",
+            style = list("font-family" = "monospace")
+          ),
+          severity_value = reactable::colDef(
+            name = "Severity value",
+            format = reactable::colFormat(digits = 2)
+          )
+        ),
+        sortable = FALSE
+      )
+
+      message_table <- reactable::reactable(
+        details_df[, "message", drop = FALSE],
+        columns = list(
+          message = reactable::colDef(
+            name = "Message",
+            cell = function(value) {
+              stringr::str_replace_all(
+                value,
+                "\n",
+                "<br>"
+              )
+            },
+            html = TRUE
+          )
+        ),
+        sortable = FALSE
+      )
+
+      htmltools::div(
+        style = "padding: 1rem",
+        details_table,
+        message_table
+      )
+    },
+
+    striped = TRUE,
+    highlight = TRUE,
+    defaultSorted = "severity",
+    defaultSortOrder = "desc",
+    defaultPageSize = 25,
+    showPageSizeOptions = TRUE,
+
+    rowStyle = reactable::JS(
+      "function(rowInfo) {
+        return {
+          borderLeft: '2px solid #104E8B',
+          fontWeight: 400
+        }
+      }"
+    )
+  )
+}
+
+# Plotting ----
+
+#' Plot scaling
+#'
+#' @param complete_scores A long data frame containing all scaled metric scores
+#' @param sel_metric The metric to plot
+#' @param method_details A data frame containing method details
+#' @param metric_details A data frame containing metric details
+#'
+#' @returns A `ggplot` object showing the scaling of the selected metric
+#'
+#' @details
+#' Creates a normalization plot showing scaling of metric values, highlighting
+#' values outside the [0, 1] range. A main panel shows all datasets and a
+#' secondary panel is faceted by dataset.
+plot_scaling <- function(
+  complete_scores,
+  sel_metric,
+  method_details,
+  metric_details
+) {
+  plot_data <- complete_scores |>
+    dplyr::filter(metric == sel_metric) |>
+    dplyr::mutate(
+      method = factor(
+        method,
+        levels = method_details$method,
+        labels = method_details$method_label
+      ),
+      method_type = factor(
+        method_type,
+        levels = sort(unique(method_type)),
+        labels = sort(unique(method_type)) |>
+          stringr::str_replace_all("_", " ") |>
+          stringr::str_to_sentence()
+      ),
+    )
+
+  norm_plot <- ggplot2::ggplot(
+    plot_data,
+    ggplot2::aes(x = scaled_value, y = method)
+  ) +
+    ggplot2::annotate(
+      geom = "rect",
+      xmin = -Inf,
+      xmax = 0,
+      ymin = -Inf,
+      ymax = Inf,
+      fill = "red",
+      alpha = 0.1
+    ) +
+    ggplot2::annotate(
+      geom = "rect",
+      xmin = 1,
+      xmax = Inf,
+      ymin = -Inf,
+      ymax = Inf,
+      fill = "red",
+      alpha = 0.1
+    ) +
+    ggplot2::geom_vline(
+      xintercept = c(0, 1),
+      linetype = "dashed",
+      colour = "red"
+    ) +
+    ggplot2::geom_path(ggplot2::aes(group = dataset)) +
+    ggplot2::geom_point(ggplot2::aes(colour = method_type)) +
+    ggplot2::scale_y_discrete(limits = rev) +
+    ggplot2::scale_colour_brewer(palette = "Set1") +
+    ggplot2::labs(x = "Scaled value") +
+    ggplot2::theme_minimal() +
+    ggplot2::theme(
+      panel.border = ggplot2::element_rect(fill = NA),
+      legend.position = "bottom",
+      legend.title = ggplot2::element_blank(),
+      axis.title.y = ggplot2::element_blank()
+    )
+
+  norm_facets <- norm_plot +
+    ggplot2::facet_wrap(
+      ~dataset,
+      scales = "free_x",
+      labeller = ggplot2::as_labeller(
+        \(.x) {
+          stringr::str_wrap(.x, width = 10, whitespace_only = FALSE)
+        }
+      )
+    )
+
+  norm_panel <- patchwork::wrap_plots(
+    norm_plot + ggplot2::labs(title = "Overall"),
+    norm_facets + ggplot2::labs(title = "By dataset"),
+    ncol = 1,
+    guides = "collect"
+  ) &
+    ggplot2::theme(legend.position = "bottom")
+
+  norm_panel +
+    patchwork::plot_annotation(
+      title = metric_details$metric_label[metric_details$metric == sel_metric],
+    )
+}
+
+# Formatting ----
+
+#' Prettify BibTeX
+#'
+#' @param bibtex BibTeX string to prettify
+#' @param output Output format, either "md" for Markdown or "html" for HTML
+#'
+#' @returns A prettified BibTeX string formatted for the specified output
+prettify_bibtex <- function(bibtex, output = c("md", "html")) {
+  output <- match.arg(output)
+
+  newline <- switch(
+    output,
+    md = "\n",
+    html = "<br>"
+  )
+
+  bibtex_str <- bibtex |>
+    stringr::str_squish() |>
+    stringr::str_replace(", ", paste0(",", newline, "    ")) |>
+    stringr::str_replace_all("\\}, ", paste0("\\},", newline, "    ")) |>
+    stringr::str_replace("\\s?\\}$", paste0(newline, "\\}"))
+
+  if (output == "html") {
+    bibtex_str <- paste0("<pre>", bibtex_str, "</pre>")
+  }
+
+  bibtex_str
+}
+
+#' Format HTML link
+#'
+#' @param value The URL to format as an HTML link
+#'
+#' @returns A string containing the HTML link
+format_html_link <- function(value) {
+  paste0("<a href='", value, "'>", value, "</a>")
+}
+
+#' Label memory
+#'
+#' @param x_mb A numeric vector of memory sizes in megabytes (MB)
+#' @param include_mb A logical indicating whether to include label values less
+#'   than 1 GB
+#'
+#' @returns A character vector with memory labels
+label_memory <- function(x_mb, include_mb = TRUE) {
+  dplyr::case_when(
+    is.na(x_mb) | x_mb < 0 ~ "NA",
+    x_mb < 1 ~ "<1M",
+    x_mb < 1e3 & !include_mb ~ "<1G",
+    x_mb < 1e3 ~ paste0(round(x_mb), "M"),
+    x_mb < 1e6 ~ paste0(round(x_mb / 1e3), "G"),
+    x_mb < 1e9 ~ paste0(round(x_mb / 1e6), "T"),
+    TRUE ~ ">1P"
+  )
+}
+
+#' Label time
+#'
+#' @param time A numeric vector of time values in seconds
+#'
+#' @returns A character vector with time labels
+label_time <- function(time) {
+  dplyr::case_when(
+    is.na(time) | time < 0 ~ "NA",
+    time < 1e-5 ~ "0s",
+    time < 1 ~ "<1s",
+    time < 60 ~ paste0(floor(time), "s"),
+    time < 3600 ~ paste0(floor(time / 60), "m"),
+    time < 3600 * 24 ~ paste0(floor(time / 3600), "h"),
+    time < 3600 * 24 * 7 ~ paste0(floor(time / 3600 / 24), "d"),
+    TRUE ~ ">7d"
+  )
+}
+
+# Helpers ----
+
+#' Aggregate scores
+#'
+#' @param scores A vector of scores to aggregate
+#'
+#' @returns An aggregated mean score
+#'
+#' @details
+#' Values are restricted to between 0 and 1 and missing values are replaced by
+#' 0. For use in creating the summary FunkyHeatmap
+aggregate_scores <- function(scores) {
+  scores[is.na(scores)] <- 0
+  scores[scores < 0] <- 0
+  scores[scores > 1] <- 1
+
+  mean(scores, na.rm = TRUE)
+}
diff --git a/src/reporting/render_report/report-template.qmd b/src/reporting/render_report/report-template.qmd
new file mode 100644
index 000000000..133d261b4
--- /dev/null
+++ b/src/reporting/render_report/report-template.qmd
@@ -0,0 +1,1217 @@
+---
+title: "Open Problems task run report"
+date: today
+
+format:
+  html:
+    theme: cosmo
+    toc: true
+    toc-depth: 2
+    embed-resources: true
+    grid:
+      body-width: 1000px
+
+brand:
+  logo:
+    medium: favicon.svg
+  
+  color:
+    palette:
+      black: "#1A1A1A"
+      white: "#FFFFFF"
+      blue: "#165AE3"
+    foreground: black
+    background: white
+    primary: blue
+  
+  typography:
+    fonts:
+      - family: DM Sans
+        source: google
+      - family: Plus Jakarta Sans
+        source: google
+  
+    base: DM Sans
+    headings:
+      family: Plus Jakarta Sans
+      weight: 400
+  
+lightbox: true
+number-sections: true
+
+execute:
+  echo: false
+
+knitr:
+  opts_chunk:
+    out.width: "100%"
+    
+params:
+  task_results_json: task_results.json
+  logo: logo.svg
+  functions: functions.R
+---
+
+```{r params, eval=FALSE}
+params <- list(
+  task_results_json = "resources_test/openproblems/task_results_v4/processed/combined_output.json",
+  logo = "src/reporting/render_report/logo.svg",
+  functions = "src/reporting/render_report/report-functions.R"
+)
+```
+
+```{r}
+#| label: source
+source(params$functions)
+
+`%||%` <- rlang::`%||%`
+```
+
+```{r}
+#| label: load
+task_results <- jsonlite::read_json(
+  params$task_results_json,
+  simplifyVector = FALSE,
+  simplifyDataFrame = FALSE
+)
+```
+
+![](`r params$logo`){fig-align="center" width=80%}
+
+# Introduction
+
+This report displays and summarizes the output from an Open Problems task run.
+You can use it to check the results before they are uploaded to the Open Problems website.
+
+Please pay particular attention to **@sec-normalization Normalization** and **@sec-quality-control Quality control** to see if there are any issues with the task run.
+
+::: {.callout-caution}
+The results in this report are preliminary and may be slightly different to what
+are displayed in the final version on the Open Problems website
+:::
+
+# Task information
+
+::: {.callout-note}
+This section displays the task information as provided in the task `_viash.yaml` file
+:::
+
+```{r}
+#| label: task-info
+task_info <- task_results$task_info
+```
+
+## Summary
+
+**Task:** `r task_info$label`
+
+`r task_info$summary`
+
+```{r}
+#| label: task-repo_str
+task_repo_str <- stringr::str_remove(task_info$repository, "https://github.com/")
+```
+
+**Repository:** [``r task_repo_str``](`r task_info$repository`)
+
+**License:** `r task_info$license`
+
+```{r}
+#| label: task-info-version
+task_version <- task_info$version
+task_prerelease <- ifelse(task_info$is_prerelease, "(Pre-release)", "")
+```
+
+**Version:** ``r task_version`` `r task_prerelease`
+
+`r if(!is.null(task_info$commit)) paste0("**Commit:** \x60", task_info$commit, "\x60")`
+
+## Description
+
+`r task_info$description`
+
+## Authors
+
+```{r}
+#| label: task-authors
+get_authors_table(task_info$authors)
+```
+
+## References
+
+```{r}
+#| label: task-references
+get_references_table(task_info$references)
+```
+
+# Dataset information
+
+::: {.callout-note}
+This section displays the dataset information as provided in the dataset `config.vsh.yaml` files.
+
+Expand each row of the table for more details.
+:::
+
+```{r}
+#| label: dataset-info
+dataset_info <- task_results$dataset_info
+
+dataset_summary <- purrr::map_dfr(dataset_info, function(.dataset) {
+  data.frame(
+    dataset = .dataset$name,
+    label = .dataset$label,
+    summary = .dataset$summary
+  )
+})
+
+dataset_details <- purrr::map_dfr(dataset_info, function(.dataset) {
+  data.frame(
+    description = .dataset$description,
+    modalities = paste(.dataset$modalities, collapse = ", "),
+    organisms = paste(.dataset$organisms, collapse = ", "),
+    file_size_mb = .dataset$file_size_mb,
+    commit = .dataset$commit,
+    source_url = .dataset$source_url,
+    common_dataset_names = paste(.dataset$common_dataset_names, collapse = ", "),
+    date_created = .dataset$date_created
+  )
+})
+
+detail_columns <- c("modalities","organisms", "file_size_mb")
+source_columns <- c("commit", "source_url", "common_dataset_names", "date_created")
+
+reactable::reactable(
+  dataset_summary,
+  columns = list(
+    dataset = reactable::colDef(
+      name = "Dataset",
+      style = list("font-family" = "monospace"),
+      html = TRUE
+    ),
+    label = reactable::colDef(name = "Label"),
+    summary = reactable::colDef(name = "Summary")
+  ),
+  
+  details = function(index, column) {
+    description_table <- get_description_table(
+      dataset_details[index, c("description"), drop = FALSE]
+    )
+    
+    details_table <- reactable::reactable(
+      dataset_details[index, detail_columns],
+      columns = list(
+        modalities = reactable::colDef(name = "Modalities"),
+        organisms = reactable::colDef(name = "Organisms"),
+        file_size_mb = reactable::colDef(
+          name = "File size (MB)",
+          format = reactable::colFormat(digits = 2)
+        )
+      ),
+      sortable = FALSE
+    )
+    
+    source_table <- reactable::reactable(
+      dataset_details[index, source_columns],
+      columns = list(
+        commit = reactable::colDef(
+          name = "Commit",
+          style = list("font-family" = "monospace")
+        ),
+        source_url = reactable::colDef(
+          name = "Source URL",
+          cell = format_html_link,
+          html = TRUE
+        ),
+        common_dataset_names = reactable::colDef(name = "Common datasets"),
+        date_created = reactable::colDef(name = "Date created")
+      ),
+      sortable = FALSE
+    )
+    
+    if (length(dataset_info[[index]]$authors) > 0) {
+      authors_table <- get_authors_table(dataset_info[[index]]$authors)
+    } else {
+      authors_table <- NULL
+    }
+    
+    references_table <- get_references_table(dataset_info[[index]]$references)
+    
+    htmltools::div(
+      style = "padding: 1rem",
+      description_table,
+      details_table,
+      source_table,
+      authors_table,
+      references_table
+    )
+  },
+  
+  highlight = TRUE,
+  striped = TRUE,
+  pagination = FALSE,
+  
+  rowStyle = reactable::JS(
+    "function(rowInfo) {
+      if (rowInfo.level == 0) { // corresponds to row group
+        return { 
+          borderLeft: '2px solid #104E8B',
+          fontWeight: 400
+        }
+      } 
+    }"
+  )
+)
+```
+
+# Method information
+
+::: {.callout-note}
+This section displays the method information as provided in the method `config.vsh.yaml` files.
+
+Expand each row of the table for more details.
+:::
+
+```{r}
+#| label: method-info
+method_info <- task_results$method_info
+
+method_summary <- purrr::map_dfr(method_info, function(.method) {
+  data.frame(
+    method = .method$name,
+    label = .method$label,
+    type = .method$type,
+    summary = .method$summary
+  )
+})
+
+method_details <- purrr::map_dfr(method_info, function(.method) {
+  method_data <- purrr::map(.method, \(.x) {ifelse(is.null(.x), "", .x)})
+  
+  data.frame(
+    description = method_data$description,
+    commit = method_data$commit,
+    version = method_data$version,
+    link_code = method_data$link_code,
+    link_documentation = method_data$link_documentation,
+    link_implementation = method_data$link_implementation,
+    link_container_image = method_data$link_container_image
+  )
+})
+
+source_columns <- c(
+  "Commit" = "commit",
+  "Version" = "version"
+)
+
+link_columns <- c(
+  "Code" = "link_code",
+  "Documentation" = "link_documentation",
+  "Implementation" = "link_implementation",
+  "Image" = "link_container_image"
+)
+
+reactable::reactable(
+  method_summary,
+  columns = list(
+    method = reactable::colDef(
+      name = "Method",
+      style = list("font-family" = "monospace"),
+      html = TRUE
+    ),
+    label = reactable::colDef(name = "Label"),
+    type = reactable::colDef(
+      name = "Type",
+      cell = function(value) {
+        value |>
+          stringr::str_replace_all("_", " ") |>
+          stringr::str_to_sentence()
+      },
+    ),
+    summary = reactable::colDef(name = "Summary")
+  ),
+  
+  details = function(index, column) {
+    description_table <- get_description_table(
+      method_details[index, c("description"), drop = FALSE]
+    )
+    
+    source_table <- get_source_table(method_details[index, ], source_columns)
+    
+    links_table <- get_links_table(method_details[index, ], link_columns)
+    
+    additional_table <- get_additional_info_table(
+      method_info[[index]]$additional_info
+    )
+    
+    if (length(method_info[[index]]$authors) > 0) {
+      authors_table <- get_authors_table(method_info[[index]]$authors)
+    } else {
+      authors_table <- NULL
+    }
+    
+    references_table <- get_references_table(method_info[[index]]$references)
+
+    htmltools::div(
+      style = "padding: 1rem",
+      description_table,
+      source_table,
+      links_table,
+      additional_table,
+      authors_table,
+      references_table
+    )
+  },
+  
+  highlight = TRUE,
+  striped = TRUE,
+  pagination = FALSE,
+  
+  rowStyle = reactable::JS(
+    "function(rowInfo) {
+      if (rowInfo.level == 0) { // corresponds to row group
+        return { 
+          borderLeft: '2px solid #104E8B',
+          fontWeight: 400
+        }
+      } 
+    }"
+  )
+)
+```
+
+# Metric information
+
+::: {.callout-note}
+This section displays the metric information as provided in the metric `config.vsh.yaml` files.
+
+Expand each row of the table for more details.
+:::
+
+```{r}
+#| label: metric-info
+metric_info <- task_results$metric_info
+
+metric_summary <- purrr::map_dfr(metric_info, function(.metric) {
+  data.frame(
+    metric = .metric$name,
+    label = .metric$label,
+    summary = .metric$summary
+  )
+})
+
+metric_details <- purrr::map_dfr(metric_info, function(.metric) {
+  metric_data <- purrr::map(.metric, \(.x) {ifelse(is.null(.x), "", .x)})
+  
+  data.frame(
+    description = metric_data$description,
+    component_name = metric_data$component_name,
+    commit = metric_data$commit,
+    version = metric_data$version,
+    maximize = metric_data$maximize,
+    link_implementation = metric_data$link_implementation,
+    link_container_image = metric_data$link_container_image
+  )
+})
+
+source_columns <- c(
+  "Component" = "component_name",
+  "Commit" = "commit",
+  "Version" = "version",
+  "Maximize?" = "maximize"
+)
+link_columns <- c(
+  "Implementation" = "link_implementation",
+  "Image" = "link_container_image"
+)
+
+reactable::reactable(
+  metric_summary,
+  columns = list(
+    metric = reactable::colDef(
+      name = "Metric",
+      style = list("font-family" = "monospace"),
+      html = TRUE
+    ),
+    label = reactable::colDef(name = "Label"),
+    summary = reactable::colDef(name = "Summary")
+  ),
+  
+  details = function(index, column) {
+    description_table <- get_description_table(
+      metric_details[index, c("description"), drop = FALSE]
+    )
+    
+    source_table <- get_source_table(metric_details[index, ], source_columns)
+    
+    links_table <- get_links_table(metric_details[index, ], link_columns)
+    
+    additional_table <- get_additional_info_table(
+      metric_info[[index]]$additional_info
+    )
+    
+    if (length(metric_info[[index]]$authors) > 0) {
+      authors_table <- get_authors_table(metric_info[[index]]$authors)
+    } else {
+      authors_table <- NULL
+    }
+    
+    references_table <- get_references_table(metric_info[[index]]$references)
+
+    htmltools::div(
+      style = "padding: 1rem",
+      description_table,
+      source_table,
+      links_table,
+      additional_table,
+      authors_table,
+      references_table
+    )
+  },
+  
+  highlight = TRUE,
+  striped = TRUE,
+  pagination = FALSE,
+  
+  rowStyle = reactable::JS(
+    "function(rowInfo) {
+      if (rowInfo.level == 0) { // corresponds to row group
+        return { 
+          borderLeft: '2px solid #104E8B',
+          fontWeight: 400
+        }
+      } 
+    }"
+  )
+)
+```
+
+# Normalization {#sec-normalization}
+
+::: {.callout-note}
+This section displays the normalization information for each metric.
+The scores for control methods are used to create a control range and scale the scores from other methods.
+Points outside the control range indicate that a metric is lacking an appropriate control method.
+
+Click the tabs to see the plots for each metric.
+:::
+
+```{r}
+#| label: normalization-controls
+#| results: asis
+dataset_names <- purrr::map_chr(dataset_info, "name")
+method_names <- purrr::map_chr(method_info, "name")
+metric_names <- purrr::map_chr(metric_info, "name")
+
+control_method_names <- method_summary$method[method_summary$type == "control_method"]
+
+n_controls <- purrr::map_dfr(task_results$results, function(.result) {
+  data.frame(dataset = .result$dataset_name, method = .result$method)
+}) |>
+  dplyr::filter(
+    method %in% control_method_names
+  ) |>
+  dplyr::group_by(dataset) |>
+  dplyr::count(name = "n_controls") |>
+  dplyr::ungroup() |>
+  dplyr::mutate(dataset = factor(dataset, levels = dataset_names)) |>
+  tidyr::complete(dataset, fill = list(n_controls = 0)) 
+
+has_controls <- all(n_controls$n_controls >= 2)
+
+if (isFALSE(has_controls)) {
+  out <- c("## No normalization performed")
+  
+  cat(
+    c(
+      "::: {.callout-important}",
+      "There are less than two control methods for some datasets.",
+      "**Scaling cannot be peformed and the results sections will be empty.**",
+      "See the quality control section for more information.",
+      "",
+      knitr::kable(
+        n_controls,
+        col.names = c("Dataset", "Number of control methods")
+      ),
+      ":::"
+    ),
+    sep = "\n"
+  )
+} else if (!all(n_controls$n_controls == length(control_method_names))) {
+    cat(
+    c(
+      "::: {.callout-warning}",
+      "Some control method results are missing for some datasets.",
+      "This may affect scaling and results.",
+      "See the normalization plots and the quality control section for more information.",
+      "",
+      knitr::kable(
+        n_controls,
+        col.names = c("Dataset", "Number of control methods")
+      ),
+      ":::"
+    ),
+    sep = "\n"
+  )
+}
+```
+
+```{r}
+#| label: normalization
+#| eval: !expr has_controls
+dataset_details <- purrr::map_dfr(dataset_info, function(.dataset) {
+  data.frame(
+    dataset = .dataset$name,
+    dataset_label = .dataset$label
+  )
+}) |>
+  dplyr::arrange(dataset)
+
+method_details <- purrr::map_dfr(method_info, function(.method) {
+  data.frame(
+    method = .method$name,
+    method_label = .method$label,
+    method_type = .method$type
+  )
+}) |>
+  dplyr::arrange(method)
+
+metric_details <- purrr::map_dfr(metric_info, function(.metric) {
+  data.frame(
+    metric = .metric$name,
+    metric_label = .metric$label
+  )
+}) |>
+  dplyr::arrange(metric)
+
+scores <- purrr::map_dfr(task_results$results, function(.result) {
+  if (!.result$succeeded) {
+    return(NULL)
+  }
+  
+  data.frame(
+    dataset = .result$dataset_name,
+    method = .result$method,
+    metric = unlist(.result$metric_names),
+    value = unlist(.result$metric_values)
+  )
+})
+  
+control_ranges <- scores |>
+  dplyr::left_join(method_details, by = "method") |>
+  dplyr::filter(method_type == "control_method") |>
+  dplyr::group_by(dataset, metric) |>
+  dplyr::summarise(
+    control_min = min(value, na.rm = TRUE),
+    control_max = max(value, na.rm = TRUE),
+    .groups = "drop"
+  )
+
+scaled_scores <- scores |>
+  dplyr::left_join(control_ranges, by = c("dataset", "metric")) |>
+  dplyr::mutate(
+    scaled_value = (value - control_min) / (control_max - control_min)
+  )
+  
+complete_scores <- tidyr::expand_grid(
+  dataset = dataset_names,
+  method = method_names,
+  metric = metric_names
+) |>
+  dplyr::left_join(dataset_details, by = "dataset") |>
+  dplyr::relocate(method, metric, .after = dplyr::last_col()) |>
+  dplyr::left_join(method_details, by = "method") |>
+  dplyr::relocate(metric, .after = dplyr::last_col()) |>
+  dplyr::left_join(metric_details, by = "metric") |>
+  dplyr::left_join(scaled_scores, by = c("dataset", "method", "metric")) |>
+  tidyr::replace_na(list(scaled_value = 0)) |>
+  dplyr::arrange(dataset, method, metric)
+```
+
+::: {.panel-tabset}
+
+```{r}
+#| label: normalization-plots
+#| results: hide
+#| eval: !expr has_controls
+fig_height <- 0.8 * length(metric_names) + 1
+
+src_list <- purrr::map(metric_names, function(.metric) {
+  metric_label <- metric_details$metric_label[metric_details$metric == .metric]
+  
+  src <- c(
+    "## <<metric_label>> {.unnumbered .unlisted}",
+    "",
+    "```{r normalization-plot-<<.metric>>, fig.height=<<fig_height>>}",
+    "plot_scaling(complete_scores, '<<.metric>>', method_details, metric_details)",
+    "```",
+    ""
+  )
+  knitr::knit_expand(text = src, delim = c("<<", ">>"))
+})
+
+out <- knitr::knit_child(text = unlist(src_list), options = list(cache = FALSE))
+```
+
+`r out`
+
+:::
+
+# Quality control {#sec-quality-control}
+
+::: {.callout-note}
+This section displays quality control information about the task run.
+
+Click on the tabs to see each category of quality control checks and expand the rows to see more information.
+:::
+
+```{r}
+#| label: quality-control
+quality_control <- task_results$quality_control |>
+  purrr::map_dfr(as.data.frame) |>
+  dplyr::mutate(check = ifelse(severity > 0, "failed", "passed"))
+
+qc_summary <- quality_control |>
+  dplyr::group_by(category, check) |>
+  dplyr::count(name = "amount") |>
+  dplyr::ungroup() |>
+  tidyr::spread(check, amount, fill = 0) |>
+  dplyr::mutate(
+    category = factor(
+      category,
+      levels = c(
+        "Task info",
+        "Dataset info",
+        "Method info",
+        "Metric info",
+        "Raw results",
+        "Scaling" 
+      )
+    )
+  ) |>
+  tidyr::complete(category, fill = list(passed = 0, failed = 0))
+
+reactable::reactable(
+  qc_summary,
+  columns = list(
+    category = reactable::colDef(name = "Category"),
+    passed = reactable::colDef(name = "Passed checks"),
+    failed = reactable::colDef(name = "Failed checks")
+  ),
+  sortable = FALSE,
+  striped = TRUE
+)
+```
+
+::: {.panel-tabset}
+
+## Task information {.unnumbered .unlisted}
+
+```{r}
+#| label: quality-control-task
+quality_control |>
+  dplyr::filter(category == "Task info", check == "failed") |>
+  dplyr::select(-category, -check) |>
+  get_qc_table()
+```
+
+## Dataset information {.unnumbered .unlisted}
+
+```{r}
+#| label: quality-control-datasets
+quality_control |>
+  dplyr::filter(category == "Dataset info", check == "failed") |>
+  dplyr::select(-category, -check) |>
+  get_qc_table()
+```
+
+## Method information {.unnumbered .unlisted}
+
+```{r}
+#| label: quality-control-methods
+quality_control |>
+  dplyr::filter(category == "Method info", check == "failed") |>
+  dplyr::select(-category, -check) |>
+  get_qc_table()
+```
+
+## Metric information {.unnumbered .unlisted}
+
+```{r}
+#| label: quality-control-metrics
+quality_control |>
+  dplyr::filter(category == "Metric info", check == "failed") |>
+  dplyr::select(-category, -check) |>
+  get_qc_table()
+```
+
+## Raw results {.unnumbered .unlisted}
+
+```{r}
+#| label: quality-control-results
+quality_control |>
+  dplyr::filter(category == "Raw results", check == "failed") |>
+  dplyr::select(-category, -check) |>
+  get_qc_table()
+```
+
+## Scaling {.unnumbered .unlisted}
+
+```{r}
+#| label: quality-control-scaling
+quality_control |>
+  dplyr::filter(category == "Scaling", check == "failed") |>
+  dplyr::select(-category, -check) |>
+  get_qc_table()
+```
+
+:::
+
+# Results
+
+```{r}
+#| label: results
+#| eval: !expr has_controls
+mean_scores <- complete_scores |>
+  dplyr::group_by(dataset, method) |>
+  dplyr::summarise(
+    mean_score = aggregate_scores(scaled_value),
+    .groups = "drop"
+  )
+
+dataset_scores <- complete_scores |>
+  dplyr::select(dataset, method, metric, scaled_value) |>
+  tidyr::pivot_wider(
+    names_from = metric,
+    values_from = scaled_value
+  ) |>
+  dplyr::left_join(mean_scores, by = c("dataset", "method"))
+
+overall_scores <- dataset_scores |>
+  dplyr::group_by(method) |>
+  dplyr::summarise(
+    dataset = "overall",
+    dplyr::across(
+      tidyselect::where(is.numeric),
+      aggregate_scores
+    ),
+    .groups = "drop"
+  )
+
+exit_names <- c(
+  "Memory limit exceeded",
+  "Time limit exceeded",
+  "Execution error",
+  "Unknown error",
+  "Not applicable",
+  "No error"
+)
+
+exit_codes <- purrr::map_dfr(task_results$results, function(.result) {
+  data.frame(
+    dataset = .result$dataset_name,
+    method = .result$method
+  )
+}) |>
+  dplyr::mutate(
+    exit_codes = purrr::map(task_results$results, "run_exit_code")
+  ) |>
+  dplyr::mutate(
+    exit_codes = purrr::map(exit_codes, \(.codes) {
+      if (length(.codes) == 0) {
+        0
+      } else {
+        .codes
+      }
+    })
+  ) |>
+  dplyr::group_by(method) |>
+  dplyr::summarise(
+    exit_codes = list(unlist(exit_codes)),
+    .groups = "drop"
+  ) |>
+  dplyr::mutate(
+    all_codes = purrr::map_chr(exit_codes, function(.codes) {
+      paste(.codes, collapse = ", ")
+    }),
+    pct_oom = purrr::map_dbl(exit_codes, function(.codes) {
+      mean(.codes == 137)
+    }),
+    pct_timeout = purrr::map_dbl(exit_codes, function(.codes) {
+      mean(.codes == 143)
+    }),
+    pct_error = purrr::map_dbl(exit_codes, function(.codes) {
+      mean(.codes > 0 & .codes != 137 & .codes != 143 & .codes != 99)
+    }),
+    pct_unknown = purrr::map_dbl(exit_codes, function(.codes) {
+      mean(.codes < 0)
+    }),
+    pct_na = purrr::map_dbl(exit_codes, function(.codes) {
+      mean(.codes == 99)
+    }),
+    pct_ok = purrr::map_dbl(exit_codes, function(.codes) {
+      mean(.codes == 0)
+    }),
+  ) |>
+  tidyr::nest(exit_summary = tidyselect::starts_with("pct")) |>
+  dplyr::mutate(exit_summary = purrr::map(exit_summary, \(.summary) {
+    exit_vec <- unlist(as.vector(.summary))
+    names(exit_vec) <- exit_names
+    
+    exit_vec
+  }))
+
+resources <- purrr::map_dfr(task_results$results, function(.result) {
+  data.frame(
+    dataset = .result$dataset_name,
+    method = .result$method
+  )
+}) |>
+  dplyr::mutate(
+    run_duration_secs = purrr::map(task_results$results, "run_duration_secs"),
+    run_cpu_pct = purrr::map(task_results$results, "run_cpu_pct"),
+    run_peak_memory_mb = purrr::map(task_results$results, "run_peak_memory_mb"),
+    run_disk_read_mb = purrr::map(task_results$results, "run_disk_read_mb"),
+    run_disk_write_mb = purrr::map(task_results$results, "run_disk_write_mb")
+  ) |>
+  # Summarise per task
+  dplyr::mutate(
+    run_cpu_pct = purrr::map_dbl(run_cpu_pct, function(.values) {
+      if (length(.values) == 0) {
+        return(NA_real_)
+      }
+      
+      mean(unlist(.values), na.rm = TRUE)
+    }),
+    run_peak_memory_mb = purrr::map_dbl(run_peak_memory_mb, function(.values) {
+      if (length(.values) == 0) {
+        return(NA_real_)
+      }
+      
+      max(unlist(.values), na.rm = TRUE)
+    }),
+    run_disk_read_mb = purrr::map_dbl(run_disk_read_mb, function(.values) {
+      if (length(.values) == 0) {
+        return(NA_real_)
+      }
+      
+      sum(unlist(.values), na.rm = TRUE)
+    }),
+    run_disk_write_mb = purrr::map_dbl(run_disk_write_mb, function(.values) {
+      if (length(.values) == 0) {
+        return(NA_real_)
+      }
+      
+      sum(unlist(.values), na.rm = TRUE)
+    }),
+    run_duration_secs = purrr::map_dbl(run_duration_secs, function(.values) {
+      if (length(.values) == 0) {
+        return(NA_real_)
+      }
+      
+      sum(unlist(.values), na.rm = TRUE)
+    })
+  ) |>
+  # Summarise by method
+  dplyr::group_by(method) |>
+  dplyr::summarise(
+    mean_cpu_pct = mean(run_cpu_pct, na.rm = TRUE),
+    mean_peak_memory_mb = mean(run_peak_memory_mb, na.rm = TRUE),
+    mean_disk_read_mb = mean(run_disk_read_mb, na.rm = TRUE),
+    mean_disk_write_mb = mean(run_disk_write_mb, na.rm = TRUE),
+    mean_duration_secs = mean(run_duration_secs, na.rm = TRUE),
+    .groups = "drop"
+  ) |>
+  dplyr::mutate(
+    mean_peak_memory_mb_log = -log10(mean_peak_memory_mb),
+    mean_peak_memory_label = paste0(" ", label_memory(mean_peak_memory_mb), " "),
+    mean_disk_read_mb_log = -log10(mean_disk_read_mb),
+    mean_disk_read_label = paste0(" ", label_memory(mean_disk_read_mb), " "),
+    mean_disk_write_mb_log = -log10(mean_disk_write_mb),
+    mean_disk_write_label = paste0(" ", label_memory(mean_disk_write_mb), " "),
+    mean_duration_secs_log = -log10(mean_duration_secs),
+    mean_duration_label = paste0(" ", label_time(mean_duration_secs), " ")
+  )
+```
+
+## Summary figure
+
+::: {.callout-note}
+This is a static version of the main summary figure shown on the Open Problems website.
+
+Click on the image to expand it.
+:::
+
+```{r}
+#| label: results-figure
+#| message: false
+#| fig-width: 18
+#| fig-height: 16
+#| eval: !expr has_controls
+figure_data <- overall_scores |>
+  dplyr::select(-dataset) |>
+  dplyr::relocate(mean_score, .after = method) |>
+  dplyr::left_join(
+    dplyr::select(exit_codes, method, exit_summary),
+    by = "method"
+  ) |>
+  dplyr::relocate(exit_summary, .after = mean_score) |>
+  # Fill in missing exit summaries for methods that were skipped
+  dplyr::mutate(
+    exit_summary = purrr::map(exit_summary, \(.summary) {
+      if (!is.null(.summary)) {
+        return(.summary)
+      } else {
+        rep(0, length(exit_names)) |>
+          setNames(exit_names)
+      }
+    })
+  ) |>
+  dplyr::left_join(
+    mean_scores |>
+      dplyr::arrange(dataset) |>
+      tidyr::pivot_wider(names_from = "dataset", values_from = "mean_score"),
+    by = "method"
+  ) |>
+  dplyr::relocate(
+    tidyselect::all_of(dataset_details$dataset),
+    .after = exit_summary
+  ) |>
+  dplyr::left_join(
+    resources |>
+      dplyr::select(
+        method,
+        mean_cpu_pct,
+        mean_peak_memory_mb_log,
+        mean_peak_memory_label,
+        mean_disk_read_mb_log,
+        mean_disk_read_label,
+        mean_disk_write_mb_log,
+        mean_disk_write_label,
+        mean_duration_secs_log,
+        mean_duration_label
+      ),
+    by = "method"
+  ) |>
+  # Resources are not 0-1 so need to be rescaled
+  dplyr::mutate(
+    mean_cpu_pct = scales::rescale(mean_cpu_pct),
+    mean_peak_memory_mb_log = scales::rescale(mean_peak_memory_mb_log),
+    mean_disk_read_mb_log = scales::rescale(mean_disk_read_mb_log),
+    mean_disk_write_mb_log = scales::rescale(mean_disk_write_mb_log),
+    mean_duration_secs_log = scales::rescale(mean_duration_secs_log)
+  ) |>
+  dplyr::arrange(dplyr::desc(mean_score)) |>
+  dplyr::mutate(
+    method = factor(
+      method,
+      levels = method_details$method,
+      labels = method_details$method_label
+    )
+  ) |>
+  dplyr::rename(id = method)
+
+column_info <- tibble::tibble(
+  id = colnames(figure_data),
+  name = c(
+    "Method",
+    "Overall score",
+    "Error reason",
+    dataset_details$dataset_label,
+    metric_details$metric_label,
+    "% CPU",
+    "Peak memory",
+    "",
+    "Disk read",
+    "",
+    "Disk write",
+    "",
+    "Duration",
+    ""
+  ),
+  geom = c(
+    "text",
+    "bar",
+    "pie",
+    rep("funkyrect", length(dataset_names)),
+    rep("funkyrect", length(metric_names)),
+    c("funkyrect", rep(c("rect", "text"), 4))
+  ),
+  group = c(
+    NA,
+    "overall",
+    "overall",
+    rep("datasets", length(dataset_names)),
+    rep("metrics", length(metric_names)),
+    rep("resources", 9)
+  ),
+  palette = c(
+    NA,
+    "overall_palette",
+    "error_reason_palette",
+    rep("datasets_palette", length(dataset_names)),
+    rep("metrics_palette", length(metric_names)),
+    "resources_palette", rep(c("resources_palette", "black"), 4)
+  ),
+  width = c(
+    12,
+    4,
+    1,
+    rep(1, length(dataset_names)),
+    rep(1, length(metric_names)),
+    rep(1, 9)
+  ),
+  overlay = c(
+    FALSE,
+    FALSE,
+    FALSE,
+    rep(FALSE, length(dataset_names)),
+    rep(FALSE, length(metric_names)),
+    FALSE, rep(c(FALSE, TRUE), 4)
+  ),
+  hjust = c(
+    0,
+    0,
+    0.5,
+    rep(0.5, length(dataset_names)),
+    rep(0.5, length(metric_names)),
+    rep(0.5, 9)
+  )
+)
+
+column_groups <- tibble::tibble(
+  group = c("overall", "datasets", "metrics", "resources"),
+  category = c("Overall", "Datasets", "Metrics", "Resources"),
+  palette = c("overall_palette", "datasets_palette", "metrics_palette", "resources_palette"),
+)
+
+palettes <- list(
+  overall_palette = "Greys",
+  error_reason_palette = c(
+    "#8DD3C7",
+    "#FFFFB3",
+    "#BEBADA",
+    "#fdb462",
+    "#999999",
+    "#FFFFFF"
+  ),
+  datasets_palette = "Blues",
+  metrics_palette = "Reds",
+  resources_palette = "YlOrBr",
+  black = c("black", "black")
+)
+names(palettes$error_reason_palette) <- exit_names
+
+legends <- list(
+  list(
+    geom = "funkyrect",
+    title = "Score",
+    colour = "white"
+  ),
+  list(
+    palette = "overall_palette",
+    enabled = FALSE
+  ),
+  list(
+    palette = "error_reason_palette",
+    geom = "pie",
+    title = "",
+    label_width = 5
+  ),
+  list(
+    palette = "datasets_palette",
+    enabled = FALSE
+  ),
+  list(
+    palette = "metrics_palette",
+    enabled = FALSE
+  ),
+  list(
+    palette = "resources_palette",
+    enabled = FALSE
+  )
+)
+
+funkyheatmap::funky_heatmap(
+  figure_data,
+  column_info = column_info,
+  column_groups = column_groups,
+  palettes = palettes,
+  legends = legends,
+  scale_column = FALSE,
+  position_args = funkyheatmap::position_arguments(
+    col_space = 0.2,
+    col_bigspace = 0.8,
+    col_annot_offset = 6
+  )
+)
+```
+
+## Table
+
+::: {.callout-note}
+This table displays the scaled metric scores.
+The "Overall" dataset gives the mean score across all of the actual datasets.
+
+Sort and filter the table to check scores you are interested in.
+:::
+
+```{r}
+#| label: results-table
+#| eval: !expr has_controls
+table_data <- dataset_scores |>
+  dplyr::bind_rows(overall_scores) |>
+  dplyr::mutate(
+    dataset = factor(
+      dataset,
+      levels = c("overall", dataset_details$dataset),
+      labels = c("Overall", dataset_details$dataset_label)
+    ),
+    method = factor(
+      method,
+      levels = method_details$method,
+      labels = method_details$method_label
+    )
+  ) |>
+  dplyr::relocate(dataset, .after = method) |>
+  dplyr::relocate(mean_score, .after = dataset) |>
+  dplyr::arrange(dataset, method)
+
+reactable::reactable(
+  table_data,
+  
+  columns = c(
+    list(
+      method = reactable::colDef(
+        name = "Method",
+        sticky = "left"
+      ),
+      dataset = reactable::colDef(
+        name = "Dataset",
+        sticky = "left",
+        style = list(borderRight = "2px solid #999"),
+        headerStyle = list(borderRight = "2px solid #999")
+      ),
+      mean_score = reactable::colDef(
+        name = "Mean score",
+        format = reactable::colFormat(digits = 3)
+      )
+    ),
+    purrr::map( metric_details$metric_label, 
+      function(.metric_label) {
+        reactable::colDef(
+          name = .metric_label,
+          format = reactable::colFormat(digits = 3)
+        )
+      }
+    ) |>
+      purrr::set_names(metric_details$metric)
+  ),
+  
+  highlight = TRUE,
+  striped = TRUE,
+  defaultPageSize = 25,
+  showPageSizeOptions = TRUE,
+  filterable = TRUE,
+  searchable = TRUE
+)
+```
diff --git a/src/reporting/render_report/script.R b/src/reporting/render_report/script.R
new file mode 100644
index 000000000..286e20f37
--- /dev/null
+++ b/src/reporting/render_report/script.R
@@ -0,0 +1,70 @@
+## VIASH START
+processed_dir <- "resources_test/openproblems/task_results_v4/processed"
+
+par <- list(
+  # Inputs
+  input_task_results = paste0(processed_dir, "/task_info.json"),
+  # Outputs
+  output = "report.html"
+)
+## VIASH END
+
+################################################################################
+#                              MAIN SCRIPT
+################################################################################
+
+cat("====== Render report ======\n")
+
+cat("\n>>> Copying input file to temporary directory...\n")
+tmp_dir <- file.path(tempdir(), "render-report")
+dir.create(tmp_dir, recursive = TRUE)
+cat("Temporary directory: ", tmp_dir, "\n", sep = "")
+file.copy(
+  par$input_task_results,
+  file.path(tmp_dir, "task_results.json"),
+  overwrite = TRUE
+)
+
+cat("\n>>> Copying resources to temporary directory...\n")
+cat("Copying 'report.qmd'...\n")
+file.copy(
+  file.path(meta$resources_dir, "report.qmd"),
+  tmp_dir,
+  overwrite = TRUE
+)
+cat("Copying 'logo.svg'...\n")
+file.copy(
+  file.path(meta$resources_dir, "logo.svg"),
+  tmp_dir,
+  overwrite = TRUE
+)
+cat("Copying 'functions.R'...\n")
+file.copy(
+  file.path(meta$resources_dir, "functions.R"),
+  tmp_dir,
+  overwrite = TRUE
+)
+
+cat("\n>>> Rendering report...\n")
+cat("Quarto version: ", as.character(quarto::quarto_version()), sep = "")
+xfun::in_dir(
+  tmp_dir,
+  quarto::quarto_render(
+    input = "report.qmd",
+    output_file = "report.html",
+    execute_params = list(
+      task_results_json = "task_results.json",
+      logo = "logo.svg",
+      functions = "functions.R"
+    )
+  )
+)
+
+cat("\n>>> Copying output file...\n")
+file.copy(
+  file.path(tmp_dir, "report.html"),
+  par$output,
+  overwrite = TRUE
+)
+
+cat("\n>>> Done!\n")
diff --git a/src/reporting/shared/bibliography.bib b/src/reporting/shared/bibliography.bib
new file mode 100644
index 000000000..639c74476
--- /dev/null
+++ b/src/reporting/shared/bibliography.bib
@@ -0,0 +1,2058 @@
+@misc{10x2018pbmc,
+  title  = {1k PBMCs from a Healthy Donor (v3 chemistry)},
+  author = {{10x Genomics}},
+  year   = {2018},
+  url    = {https://www.10xgenomics.com/resources/datasets/1-k-pbm-cs-from-a-healthy-donor-v-3-chemistry-3-standard-3-0-0}
+}
+
+@misc{10x2019heart,
+  title  = {Human Heart},
+  author = {{10x Genomics}},
+  year   = {2019},
+  url    = {https://www.10xgenomics.com/datasets/human-heart-1-standard-1-0-0}
+}
+
+@misc{10x2019lymph,
+  title  = {Human Lymph Node},
+  author = {{10x Genomics}},
+  year   = {2019},
+  url    = {https://www.10xgenomics.com/datasets/human-lymph-node-1-standard-1-0-0}
+}
+
+@misc{10x2019pbmc,
+  title  = {5k Peripheral Blood Mononuclear Cells (PBMCs) from a Healthy Donor with a Panel of TotalSeq-B Antibodies (v3 chemistry)},
+  author = {{10x Genomics}},
+  year   = {2019},
+  url    = {https://www.10xgenomics.com/resources/datasets/5-k-peripheral-blood-mononuclear-cells-pbm-cs-from-a-healthy-donor-with-cell-surface-proteins-v-3-chemistry-3-1-standard-3-1-0}
+}
+
+@misc{10x2020breast,
+  title  = {Human Breast Cancer: Whole Transcriptome Analysis},
+  author = {{10x Genomics}},
+  year   = {2020},
+  url    = {https://www.10xgenomics.com/datasets/human-breast-cancer-whole-transcriptome-analysis-1-standard-1-2-0}
+}
+
+@misc{10x2020cerebellum,
+  title  = {Human Cerebellum: Whole Transcriptome Analysis},
+  author = {{10x Genomics}},
+  year   = {2020},
+  url    = {https://www.10xgenomics.com/datasets/human-cerebellum-whole-transcriptome-analysis-1-standard-1-2-0}
+}
+
+@misc{10x2020kidney,
+  title  = {Mouse Kidney Section (Coronal)},
+  author = {{10x Genomics}},
+  year   = {2020},
+  url    = {https://www.10xgenomics.com/datasets/mouse-kidney-section-coronal-1-standard-1-1-0}
+}
+
+@misc{10x2021breast,
+  title  = {Human Breast Cancer: Ductal Carcinoma In Situ, Invasive Carcinoma (FFPE)},
+  author = {{10x Genomics}},
+  year   = {2021},
+  url    = {https://www.10xgenomics.com/datasets/human-breast-cancer-ductal-carcinoma-in-situ-invasive-carcinoma-ffpe-1-standard-1-3-0}
+}
+
+@misc{10x2021prostate,
+  title  = {Normal Human Prostate (FFPE)},
+  author = {{10x Genomics}},
+  year   = {2021},
+  url    = {https://www.10xgenomics.com/datasets/normal-human-prostate-ffpe-1-standard-1-3-0}
+}
+
+@misc{10x2022brain,
+  title  = {Mouse Brain Coronal Section 1 (FFPE)},
+  author = {{10x Genomics}},
+  year   = {2022},
+  url    = {https://www.10xgenomics.com/datasets/mouse-brain-coronal-section-1-ffpe-2-standard}
+}
+
+@misc{10x2022cervical,
+  title  = {Human Cervical Cancer (FFPE)},
+  author = {{10x Genomics}},
+  year   = {2022},
+  url    = {https://www.10xgenomics.com/datasets/human-cervical-cancer-1-standard}
+}
+
+@misc{10x2022olfactory,
+  title  = {Adult Mouse Olfactory Bulb},
+  author = {{10x Genomics}},
+  year   = {2022},
+  url    = {https://www.10xgenomics.com/datasets/adult-mouse-olfactory-bulb-1-standard-1}
+}
+
+@misc{10x2022intestine,
+  title  = {Human Intestine Cancer (FPPE)},
+  author = {{10x Genomics}},
+  year   = {2022},
+  url    = {https://www.10xgenomics.com/datasets/human-intestine-cancer-1-standard}
+}
+
+@misc{10x2022melanoma,
+  title  = {Human Melanoma, IF Stained (FFPE)},
+  author = {{10x Genomics}},
+  year   = {2022},
+  url    = {https://www.10xgenomics.com/datasets/human-melanoma-if-stained-ffpe-2-standard}
+}
+
+@misc{10x2022prostate,
+  title  = {Human Prostate Cancer, Adjacent Normal Section with IF Staining (FFPE)},
+  author = {{10x Genomics}},
+  year   = {2022},
+  url    = {https://www.10xgenomics.com/datasets/human-prostate-cancer-adjacent-normal-section-with-if-staining-ffpe-1-standard}
+}
+
+@misc{10x2023brain,
+  title  = {Human Brain Cancer, 11 mm Capture Area (FFPE)},
+  author = {{10x Genomics}},
+  year   = {2023},
+  url    = {https://www.10xgenomics.com/datasets/human-brain-cancer-11-mm-capture-area-ffpe-2-standard}
+}
+
+@misc{10x2023colon,
+  title  = {Visium CytAssist Gene Expression Libraries of Post-Xenium Human Colon Cancer (FFPE)},
+  author = {{10x Genomics}},
+  year   = {2023},
+  url    = {https://www.10xgenomics.com/datasets/visium-cytassist-gene-expression-libraries-of-post-xenium-human-colon-cancer-ffpe-using-the-human-whole-transcriptome-probe-set-2-standard}
+}
+
+@misc{10x2023colorectal,
+  title  = {Human Colorectal Cancer, 11 mm Capture Area (FFPE)},
+  author = {{10x Genomics}},
+  year   = {2023},
+  url    = {https://www.10xgenomics.com/datasets/human-colorectal-cancer-11-mm-capture-area-ffpe-2-standard}
+}
+
+@misc{10x2023embryo,
+  title  = {Visium CytAssist, Mouse Embryo, 11 mm Capture Area (FFPE)},
+  author = {{10x Genomics}},
+  year   = {2023},
+  url    = {https://www.10xgenomics.com/datasets/visium-cytassist-mouse-embryo-11-mm-capture-area-ffpe-2-standard}
+}
+
+@misc{10x2023kidney,
+  title  = {Human Kidney, 11 mm Capture Area (FFPE)},
+  author = {{10x Genomics}},
+  year   = {2023},
+  url    = {https://www.10xgenomics.com/datasets/human-kidney-11-mm-capture-area-ffpe-2-standard}
+}
+
+@misc{10x2023lung,
+  title  = {Human Lung Cancer, 11 mm Capture Area (FFPE)},
+  author = {{10x Genomics}},
+  year   = {2023},
+  url    = {https://www.10xgenomics.com/datasets/human-lung-cancer-11-mm-capture-area-ffpe-2-standard}
+}
+
+@misc{10x2023mousebrain,
+  title  = {Visium CytAssist Gene Expression Libraries of Post-Xenium Mouse Brain (FF)},
+  author = {{10x Genomics}},
+  year   = {2023},
+  url    = {https://www.10xgenomics.com/datasets/visium-cytassist-gene-expression-libraries-of-post-xenium-mouse-brain-ff-using-the-mouse-whole-transcriptome-probe-set-2-standard}
+}
+
+@article{agostinis2022newwave,
+  doi       = {10.1093/bioinformatics/btac149},
+  url       = {https://doi.org/10.1093/bioinformatics/btac149},
+  year      = {2022},
+  month     = {Mar.},
+  publisher = {Oxford University Press ({OUP})},
+  volume    = {38},
+  number    = {9},
+  pages     = {2648--2650},
+  author    = {Federico Agostinis and Chiara Romualdi and Gabriele Sales and Davide Risso},
+  editor    = {Yann Ponty},
+  title     = {NewWave: a scalable R/Bioconductor package for the dimensionality reduction and batch effect removal of single-cell {RNA}-seq data},
+  journal   = {Bioinformatics}
+}
+
+@article{agrawal2021mde,
+  title     = {Minimum-Distortion Embedding},
+  author    = {Akshay Agrawal and Alnur Ali and Stephen Boyd},
+  year      = {2021},
+  journal   = {Foundations and Trends{\textregistered} in Machine Learning},
+  publisher = {Now Publishers},
+  volume    = {14},
+  number    = {3},
+  pages     = {211--378},
+  doi       = {10.1561/2200000090},
+  url       = {https://doi.org/10.1561/2200000090}
+}
+
+@article{aliee2021autogenes,
+  title     = {{AutoGeneS}: Automatic gene selection using multi-objective optimization for {RNA}-seq deconvolution},
+  author    = {Hananeh Aliee and Fabian J. Theis},
+  year      = {2021},
+  month     = {Jul.},
+  journal   = {Cell Systems},
+  publisher = {Elsevier {BV}},
+  volume    = {12},
+  number    = {7},
+  pages     = {706--715.e4},
+  doi       = {10.1016/j.cels.2021.05.006},
+  url       = {https://doi.org/10.1016/j.cels.2021.05.006}
+}
+
+@inproceedings{amelio2015normalized,
+  doi       = {10.1145/2808797.2809344},
+  url       = {https://doi.org/10.1145/2808797.2809344},
+  year      = {2015},
+  month     = {Aug.},
+  publisher = {{ACM}},
+  author    = {Alessia Amelio and Clara Pizzuti},
+  title     = {Is Normalized Mutual Information a Fair Measure for Comparing Community Detection Methods?},
+  booktitle = {Proceedings of the 2015 {IEEE}/{ACM} International Conference on Advances in Social Networks Analysis and Mining 2015}
+}
+
+@article{andersson2020single,
+  title     = {Single-cell and spatial transcriptomics enables probabilistic inference of cell type topography},
+  author    = {Alma Andersson and Joseph Bergenstr{\aa}hle and Michaela Asp and Ludvig Bergenstr{\aa}hle and Aleksandra Jurek and Jos{\'{e}} Fern{\'{a}}ndez Navarro and Joakim Lundeberg},
+  year      = {2020},
+  month     = {Oct.},
+  journal   = {Communications Biology},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {3},
+  number    = {1},
+  doi       = {10.1038/s42003-020-01247-y},
+  url       = {https://doi.org/10.1038/s42003-020-01247-y}
+}
+
+@article{andersson2021sepal,
+  title     = {sepal: Identifying transcript profiles with spatial patterns by diffusion-based modeling},
+  author    = {Andersson, Alma and Lundeberg, Joakim},
+  journal   = {Bioinformatics},
+  volume    = {37},
+  number    = {17},
+  pages     = {2644--2650},
+  year      = {2021},
+  publisher = {Oxford University Press},
+  doi       = {10.1093/bioinformatics/btab164}
+}
+
+@article{batson2019molecular,
+  title        = {Molecular Cross-Validation for Single-Cell RNA-seq},
+  author       = {Batson, Joshua and Royer, Lo{\"\i}c and Webber, James},
+  year         = {2019},
+  journal      = {bioRxiv},
+  publisher    = {Cold Spring Harbor Laboratory},
+  doi          = {10.1101/786269},
+  url          = {https://www.biorxiv.org/content/early/2019/09/30/786269},
+  elocation-id = {786269},
+  eprint       = {https://www.biorxiv.org/content/early/2019/09/30/786269.full.pdf}
+}
+
+@article{biancalani2021deep,
+  title     = {Deep learning and alignment of spatially resolved single-cell transcriptomes with Tangram},
+  author    = {Tommaso Biancalani and Gabriele Scalia and Lorenzo Buffoni and Raghav Avasthi and Ziqing Lu and Aman Sanger and Neriman Tokcan and Charles R. Vanderburg and {\AA}sa Segerstolpe and Meng Zhang and Inbal Avraham-Davidi and Sanja Vickovic and Mor Nitzan and Sai Ma and Ayshwarya Subramanian and Michal Lipinski and Jason Buenrostro and Nik Bear Brown and Duccio Fanelli and Xiaowei Zhuang and Evan Z. Macosko and Aviv Regev},
+  year      = {2021},
+  month     = {Oct.},
+  journal   = {Nature Methods},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {18},
+  number    = {11},
+  pages     = {1352--1362},
+  doi       = {10.1038/s41592-021-01264-7},
+  url       = {https://doi.org/10.1038/s41592-021-01264-7}
+}
+
+@article{bintayyash2021non,
+  author  = {BinTayyash, Nuha and Georgaka, Sokratia and John, S T and Ahmed, Sumon and Boukouvalas, Alexis and Hensman, James and Rattray, Magnus},
+  title   = {{Non-parametric modelling of temporal and spatial counts data from RNA-seq experiments}},
+  journal = {Bioinformatics},
+  volume  = {37},
+  number  = {21},
+  pages   = {3788-3795},
+  year    = {2021},
+  month   = {07},
+  issn    = {1367-4803},
+  doi     = {10.1093/bioinformatics/btab486},
+  url     = {https://doi.org/10.1093/bioinformatics/btab486},
+  eprint  = {https://academic.oup.com/bioinformatics/article-pdf/37/21/3788/50336570/btab486.pdf}
+}
+
+@article{bland2000odds,
+  title     = {Statistics Notes: The odds ratio},
+  author    = {J. M. Bland},
+  year      = {2000},
+  month     = {May},
+  journal   = {{BMJ}},
+  publisher = {{BMJ}},
+  volume    = {320},
+  number    = {7247},
+  pages     = {1468--1468},
+  doi       = {10.1136/bmj.320.7247.1468},
+  url       = {https://doi.org/10.1136/bmj.320.7247.1468}
+}
+
+@article{breiman2001random,
+  title     = {{Random forests}},
+  author    = {Breiman, Leo},
+  journal   = {Machine learning},
+  publisher = {Springer Science and Business Media LLC},
+  volume    = 45,
+  number    = 1,
+  pages     = {5--32},
+  month     = oct,
+  year      = 2001,
+  doi       = {10.1023/a:1010933404324},
+  issn      = {0885-6125,1573-0565},
+  language  = {en}
+}
+
+@article{bttner2018test,
+  title     = {A test metric for assessing single-cell {RNA}-seq batch correction},
+  author    = {Maren B\"{u}ttner and Zhichao Miao and F. Alexander Wolf and Sarah A. Teichmann and Fabian J. Theis},
+  year      = {2018},
+  month     = {Dec.},
+  journal   = {Nature Methods},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {16},
+  number    = {1},
+  pages     = {43--49},
+  doi       = {10.1038/s41592-018-0254-1},
+  url       = {https://doi.org/10.1038/s41592-018-0254-1}
+}
+
+@article{cabello2020singlecellsignalr,
+  title     = {{SingleCellSignalR}: inference of intercellular networks from single-cell transcriptomics},
+  author    = {Simon Cabello-Aguilar and M{\'{e}}lissa Alame and Fabien Kon-Sun-Tack and Caroline Fau and Matthieu Lacroix and Jacques Colinge},
+  year      = {2020},
+  month     = {Mar.},
+  journal   = {Nucleic Acids Research},
+  publisher = {Oxford University Press ({OUP})},
+  volume    = {48},
+  number    = {10},
+  pages     = {e55--e55},
+  doi       = {10.1093/nar/gkaa183},
+  url       = {https://doi.org/10.1093/nar/gkaa183}
+}
+
+@article{cable2021robust,
+  title     = {Robust decomposition of cell type mixtures in spatial transcriptomics},
+  author    = {Dylan M. Cable and Evan Murray and Luli S. Zou and Aleksandrina Goeva and Evan Z. Macosko and Fei Chen and Rafael A. Irizarry},
+  year      = {2021},
+  month     = {Feb.},
+  journal   = {Nature Biotechnology},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {40},
+  number    = {4},
+  pages     = {517--526},
+  doi       = {10.1038/s41587-021-00830-w},
+  url       = {https://doi.org/10.1038/s41587-021-00830-w}
+}
+
+@misc{cannoodt2021viashfromscripts,
+  doi       = {10.48550/ARXIV.2110.11494},
+  url       = {https://arxiv.org/abs/2110.11494},
+  author    = {Cannoodt,  Robrecht and Cannoodt,  Hendrik and Van de Kerckhove,  Eric and Boschmans,  Andy and De Maeyer,  Dries and Verbeiren,  Toni},
+  keywords  = {Software Engineering (cs.SE),  FOS: Computer and information sciences,  FOS: Computer and information sciences},
+  title     = {Viash: from scripts to pipelines},
+  publisher = {arXiv},
+  year      = {2021},
+  copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International}
+}
+
+@article{cai2023spanve,
+  title     = {Spanve: an Statistical Method to Detect Clustering-friendly Spatially Variable Genes in Large-scale Spatial Transcriptomics Data},
+  author    = {Cai, Guoxin and Chen, Yichang and Chen, Shuqing and Gu, Xun and Zhou, Zhan},
+  journal   = {bioRxiv},
+  pages     = {2023--02},
+  year      = {2023},
+  publisher = {Cold Spring Harbor Laboratory},
+  doi       = {10.1101/2023.02.08.527623}
+}
+
+@article{cao2018joint,
+  title     = {Joint profiling of chromatin accessibility and gene expression in thousands of single cells},
+  author    = {Junyue Cao and Darren A. Cusanovich and Vijay Ramani and Delasa Aghamirzaie and Hannah A. Pliner and Andrew J. Hill and Riza M. Daza and Jose L. McFaline-Figueroa and Jonathan S. Packer and Lena Christiansen and Frank J. Steemers and Andrew C. Adey and Cole Trapnell and Jay Shendure},
+  year      = {2018},
+  month     = {Sep},
+  journal   = {Science},
+  publisher = {American Association for the Advancement of Science ({AAAS})},
+  volume    = {361},
+  number    = {6409},
+  pages     = {1380--1385},
+  doi       = {10.1126/science.aau0730},
+  url       = {https://doi.org/10.1126/science.aau0730}
+}
+
+@article{cao2020human,
+  title     = {A human cell atlas of fetal gene expression},
+  author    = {Junyue Cao and Diana R. O'Day and Hannah A. Pliner and Paul D. Kingsley and Mei Deng and Riza M. Daza and Michael A. Zager and Kimberly A. Aldinger and Ronnie Blecher-Gonen and Fan Zhang and Malte Spielmann and James Palis and Dan Doherty and Frank J. Steemers and Ian A. Glass and Cole Trapnell and Jay Shendure},
+  year      = {2020},
+  month     = {Nov.},
+  journal   = {Science},
+  publisher = {American Association for the Advancement of Science ({AAAS})},
+  volume    = {370},
+  number    = {6518},
+  doi       = {10.1126/science.aba7721},
+  url       = {https://doi.org/10.1126/science.aba7721}
+}
+
+@article{chai2014root,
+  title     = {{Root mean square error (RMSE) or mean absolute error (MAE)?}},
+  author    = {Chai, T and Draxler, R R},
+  journal   = {Geoscientific model development discussions},
+  publisher = {Copernicus GmbH},
+  volume    = 7,
+  number    = 1,
+  pages     = {1525--1534},
+  month     = feb,
+  year      = 2014,
+  doi       = {10.5194/gmdd-7-1525-2014},
+  issn      = {1991-962X},
+  language  = {en}
+}
+
+@article{chang2022spatial,
+  title     = {Spatial omics representation and functional tissue module inference using graph Fourier transform},
+  author    = {Chang, Yuzhou and Liu, Jixin and Ma, Anjun and Jiang, Sizun and Krull, Jordan and Yeo, Yao Yu and Liu, Yang and Rodig, Scott J and Barouch, Dan H and Fan, Rong and others},
+  journal   = {bioRxiv},
+  pages     = {2022--12},
+  year      = {2022},
+  publisher = {Cold Spring Harbor Laboratory},
+  doi       = {10.1101/2022.12.10.519929}
+}
+
+@article{chazarragil2021flexible,
+  doi       = {10.1093/nar/gkab004},
+  url       = {https://doi.org/10.1093/nar/gkab004},
+  year      = {2021},
+  month     = {Feb.},
+  publisher = {Oxford University Press ({OUP})},
+  volume    = {49},
+  number    = {7},
+  pages     = {e42--e42},
+  author    = {Ruben Chazarra-Gil and Stijn van~Dongen and Vladimir~Yu Kiselev and Martin Hemberg},
+  title     = {Flexible comparison of batch correction methods for single-cell {RNA}-seq using {BatchBench}},
+  journal   = {Nucleic Acids Research}
+}
+
+@article{chen2009local,
+  title     = {Local Multidimensional Scaling for Nonlinear Dimension Reduction, Graph Drawing, and Proximity Analysis},
+  author    = {Lisha Chen and Andreas Buja},
+  year      = {2009},
+  month     = {Mar.},
+  journal   = {Journal of the American Statistical Association},
+  publisher = {Informa {UK} Limited},
+  volume    = {104},
+  number    = {485},
+  pages     = {209--219},
+  doi       = {10.1198/jasa.2009.0111},
+  url       = {https://doi.org/10.1198/jasa.2009.0111}
+}
+
+@inproceedings{chen2016xgboost,
+  title     = {{XGBoost}},
+  author    = {Tianqi Chen and Carlos Guestrin},
+  year      = {2016},
+  month     = {Aug.},
+  booktitle = {Proceedings of the 22nd {ACM} {SIGKDD} International Conference on Knowledge Discovery and Data Mining},
+  publisher = {{Acm}},
+  doi       = {10.1145/2939672.2939785},
+  url       = {https://doi.org/10.1145/2939672.2939785}
+}
+
+@article{cichocki2009fast,
+  title     = {Fast Local Algorithms for Large Scale Nonnegative Matrix and Tensor Factorizations},
+  author    = {Andrzej Cichocki and Anh-Huy Phan},
+  year      = {2009},
+  journal   = {{IEICE} Transactions on Fundamentals of Electronics,  Communications and Computer Sciences},
+  publisher = {Institute of Electronics,  Information and Communications Engineers ({IEICE})},
+  volume    = {E92-a},
+  number    = {3},
+  pages     = {708--721},
+  doi       = {10.1587/transfun.e92.a.708},
+  url       = {https://doi.org/10.1587/transfun.e92.a.708}
+}
+
+@article{coifman2006diffusion,
+  title     = {Diffusion maps},
+  author    = {Ronald R. Coifman and St{\'{e}}phane Lafon},
+  year      = {2006},
+  month     = {Jul.},
+  journal   = {Applied and Computational Harmonic Analysis},
+  publisher = {Elsevier {BV}},
+  volume    = {21},
+  number    = {1},
+  pages     = {5--30},
+  doi       = {10.1016/j.acha.2006.04.006},
+  url       = {https://doi.org/10.1016/j.acha.2006.04.006}
+}
+
+@article{cover1967nearest,
+  title     = {Nearest neighbor pattern classification},
+  author    = {T. Cover and P. Hart},
+  year      = {1967},
+  month     = {Jan},
+  journal   = {{IEEE} Transactions on Information Theory},
+  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
+  volume    = {13},
+  number    = {1},
+  pages     = {21--27},
+  doi       = {10.1109/tit.1967.1053964},
+  url       = {https://doi.org/10.1109/tit.1967.1053964}
+}
+
+@inproceedings{davis2006prauc,
+  title     = {The relationship between Precision-Recall and {ROC} curves},
+  author    = {Jesse Davis and Mark Goadrich},
+  year      = {2006},
+  booktitle = {Proceedings of the 23rd international conference on Machine learning  - {ICML} {\textquotesingle}06},
+  publisher = {{ACM} Press},
+  doi       = {10.1145/1143844.1143874},
+  url       = {https://doi.org/10.1145/1143844.1143874}
+}
+
+@article{Demetci2020scot,
+  author       = {Pinar Demetci and Rebecca Santorella and Bj{\"o}rn Sandstede and William Stafford Noble and Ritambhara Singh},
+  title        = {Gromov-Wasserstein optimal transport to align single-cell multi-omics data},
+  elocation-id = {2020.04.28.066787},
+  year         = {2020},
+  doi          = {10.1101/2020.04.28.066787},
+  publisher    = {Cold Spring Harbor Laboratory},
+  url          = {https://www.biorxiv.org/content/early/2020/11/11/2020.04.28.066787},
+  eprint       = {https://www.biorxiv.org/content/early/2020/11/11/2020.04.28.066787.full.pdf},
+  journal      = {bioRxiv}
+}
+
+@article{dimitrov2022comparison,
+  title     = {Comparison of methods and resources for cell-cell communication inference from single-cell {RNA}-Seq data},
+  author    = {Daniel Dimitrov and D{\'{e}}nes T\"{u}rei and Martin Garrido-Rodriguez and Paul L. Burmedi and James S. Nagai and Charlotte Boys and Ricardo O. Ramirez Flores and Hyojin Kim and Bence Szalai and Ivan G. Costa and Alberto Valdeolivas and Aur{\'{e}}lien Dugourd and Julio Saez-Rodriguez},
+  year      = {2022},
+  month     = {Jun.},
+  journal   = {Nature Communications},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {13},
+  number    = {1},
+  doi       = {10.1038/s41467-022-30755-0},
+  url       = {https://doi.org/10.1038/s41467-022-30755-0}
+}
+
+@article{donoho2017yearsdatascience,
+  doi       = {10.1080/10618600.2017.1384734},
+  url       = {https://doi.org/10.1080/10618600.2017.1384734},
+  year      = {2017},
+  month     = {Oct.},
+  publisher = {Informa {UK} Limited},
+  volume    = {26},
+  number    = {4},
+  pages     = {745--766},
+  author    = {David Donoho},
+  title     = {50 Years of Data Science},
+  journal   = {Journal of Computational and Graphical Statistics}
+}
+
+@article{efremova2020cellphonedb,
+  title     = {{CellPhoneDB}: inferring cell-cell communication from combined expression of multi-subunit ligand-receptor complexes},
+  author    = {Mirjana Efremova and Miquel Vento-Tormo and Sarah A. Teichmann and Roser Vento-Tormo},
+  year      = {2020},
+  month     = {Feb.},
+  journal   = {Nature Protocols},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {15},
+  number    = {4},
+  pages     = {1484--1506},
+  doi       = {10.1038/s41596-020-0292-x},
+  url       = {https://doi.org/10.1038/s41596-020-0292-x}
+}
+
+@article{emmons2016analysis,
+  title     = {Analysis of Network Clustering Algorithms and Cluster Quality Metrics at Scale},
+  volume    = {11},
+  issn      = {1932-6203},
+  url       = {http://dx.doi.org/10.1371/journal.pone.0159161},
+  doi       = {10.1371/journal.pone.0159161},
+  number    = {7},
+  journal   = {PLOS ONE},
+  publisher = {Public Library of Science (PLoS)},
+  author    = {Emmons,  Scott and Kobourov,  Stephen and Gallant,  Mike and B\"{o}rner,  Katy},
+  editor    = {Dovrolis,  Constantine},
+  year      = {2016},
+  month     = jul,
+  pages     = {e0159161}
+}
+
+@article{eraslan2019single,
+  title     = {Single-cell {RNA}-seq denoising using a deep count autoencoder},
+  author    = {G\"{o}kcen Eraslan and Lukas M. Simon and Maria Mircea and Nikola S. Mueller and Fabian J. Theis},
+  year      = {2019},
+  month     = {Jan},
+  journal   = {Nature Communications},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {10},
+  number    = {1},
+  doi       = {10.1038/s41467-018-07931-2},
+  url       = {https://doi.org/10.1038/s41467-018-07931-2}
+}
+
+@article{fang2022conservation,
+  title     = {Conservation and divergence of cortical cell organization in human and mouse revealed by MERFISH},
+  volume    = {377},
+  issn      = {1095-9203},
+  url       = {http://dx.doi.org/10.1126/science.abm1741},
+  doi       = {10.1126/science.abm1741},
+  number    = {6601},
+  journal   = {Science},
+  publisher = {American Association for the Advancement of Science (AAAS)},
+  author    = {Fang,  Rongxin and Xia,  Chenglong and Close,  Jennie L. and Zhang,  Meng and He,  Jiang and Huang,  Zhengkai and Halpern,  Aaron R. and Long,  Brian and Miller,  Jeremy A. and Lein,  Ed S. and Zhuang,  Xiaowei},
+  year      = {2022},
+  month     = jul,
+  pages     = {56-62}
+}
+
+@article{fix1989discriminatory,
+  doi       = {10.2307/1403797},
+  url       = {https://doi.org/10.2307/1403797},
+  year      = {1989},
+  month     = {Dec.},
+  publisher = {{JSTOR}},
+  volume    = {57},
+  number    = {3},
+  pages     = {238},
+  author    = {Evelyn Fix and J. L. Hodges},
+  title     = {Discriminatory Analysis. Nonparametric Discrimination: Consistency Properties},
+  journal   = {International Statistical Review / Revue Internationale de Statistique}
+}
+
+@article{gower1975generalized,
+  title     = {Generalized procrustes analysis},
+  author    = {J. C. Gower},
+  year      = {1975},
+  month     = {Mar.},
+  journal   = {Psychometrika},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {40},
+  number    = {1},
+  pages     = {33--51},
+  doi       = {10.1007/bf02291478},
+  url       = {https://doi.org/10.1007/bf02291478}
+}
+
+@article{grandini2020metrics,
+  title     = {Metrics for Multi-Class Classification: an Overview},
+  author    = {Grandini,  Margherita and Bagli,  Enrico and Visani,  Giorgio},
+  year      = {2020},
+  journal   = {arXiv},
+  publisher = {Cornell University},
+  doi       = {10.48550/arxiv.2008.05756},
+  url       = {https://arxiv.org/abs/2008.05756},
+  copyright = {arXiv.org perpetual, non-exclusive license},
+  keywords  = {Machine Learning (stat.ML),  Machine Learning (cs.LG),  FOS: Computer and information sciences,  FOS: Computer and information sciences}
+}
+
+@article{granja2021archr,
+  title     = {{ArchR} is a scalable software package for integrative single-cell chromatin accessibility analysis},
+  author    = {Jeffrey M. Granja and M. Ryan Corces and Sarah E. Pierce and S. Tansu Bagdatli and Hani Choudhry and Howard Y. Chang and William J. Greenleaf},
+  year      = {2021},
+  month     = {Feb.},
+  journal   = {Nature Genetics},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {53},
+  number    = {3},
+  pages     = {403--411},
+  doi       = {10.1038/s41588-021-00790-6},
+  url       = {https://doi.org/10.1038/s41588-021-00790-6}
+}
+
+@article{grn2014validation,
+  title     = {Validation of noise models for single-cell transcriptomics},
+  author    = {Dominic Gr\"{u}n and Lennart Kester and Alexander van Oudenaarden},
+  year      = {2014},
+  month     = {Apr.},
+  journal   = {Nature Methods},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {11},
+  number    = {6},
+  pages     = {637--640},
+  doi       = {10.1038/nmeth.2930},
+  url       = {https://doi.org/10.1038/nmeth.2930}
+}
+
+@article{haghverdi2018batch,
+  title     = {Batch effects in single-cell {RNA}-sequencing data are corrected by matching mutual nearest neighbors},
+  author    = {Laleh Haghverdi and Aaron T L Lun and Michael D Morgan and John C Marioni},
+  year      = {2018},
+  month     = {Apr.},
+  journal   = {Nature Biotechnology},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {36},
+  number    = {5},
+  pages     = {421--427},
+  doi       = {10.1038/nbt.4091},
+  url       = {https://doi.org/10.1038/nbt.4091}
+}
+
+@article{hammarlund2018cengen,
+  title     = {The {CeNGEN} Project: The Complete Gene Expression Map of an Entire Nervous System},
+  author    = {Marc Hammarlund and Oliver Hobert and David M. Miller and Nenad Sestan},
+  year      = {2018},
+  month     = {Aug.},
+  journal   = {Neuron},
+  publisher = {Elsevier {BV}},
+  volume    = {99},
+  number    = {3},
+  pages     = {430--433},
+  doi       = {10.1016/j.neuron.2018.07.042},
+  url       = {https://doi.org/10.1016/j.neuron.2018.07.042}
+}
+
+@article{hansen2012removing,
+  title     = {Adjusting batch effects in microarray expression data using empirical Bayes methods},
+  author    = {W. Evan Johnson and Cheng Li and Ariel Rabinovic},
+  year      = {2006},
+  month     = {Apr.},
+  journal   = {Biostatistics},
+  publisher = {Oxford University Press ({OUP})},
+  volume    = {8},
+  number    = {1},
+  pages     = {118--127},
+  doi       = {10.1093/biostatistics/kxj037},
+  url       = {https://doi.org/10.1093/biostatistics/kxj037}
+}
+
+@article{hao2021integrated,
+  title     = {Integrated analysis of multimodal single-cell data},
+  author    = {Yuhan Hao and Stephanie Hao and Erica Andersen-Nissen and William M. Mauck and Shiwei Zheng and Andrew Butler and Maddie J. Lee and Aaron J. Wilk and Charlotte Darby and Michael Zager and Paul Hoffman and Marlon Stoeckius and Efthymia Papalexi and Eleni P. Mimitou and Jaison Jain and Avi Srivastava and Tim Stuart and Lamar M. Fleming and Bertrand Yeung and Angela J. Rogers and Juliana M. McElrath and Catherine A. Blish and Raphael Gottardo and Peter Smibert and Rahul Satija},
+  year      = {2021},
+  month     = {Jun.},
+  journal   = {Cell},
+  publisher = {Elsevier {BV}},
+  volume    = {184},
+  number    = {13},
+  pages     = {3573--3587.e29},
+  doi       = {10.1016/j.cell.2021.04.048},
+  url       = {https://doi.org/10.1016/j.cell.2021.04.048}
+}
+
+@article{hao2021somde,
+  title     = {SOMDE: a scalable method for identifying spatially variable genes with self-organizing map},
+  author    = {Hao, Minsheng and Hua, Kui and Zhang, Xuegong},
+  journal   = {Bioinformatics},
+  volume    = {37},
+  number    = {23},
+  pages     = {4392--4398},
+  year      = {2021},
+  publisher = {Oxford University Press},
+  doi       = {10.1093/bioinformatics/btab471}
+}
+
+@article{hie2019efficient,
+  title     = {Efficient integration of heterogeneous single-cell transcriptomes using Scanorama},
+  author    = {Brian Hie and Bryan Bryson and Bonnie Berger},
+  year      = {2019},
+  month     = {May},
+  journal   = {Nature Biotechnology},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {37},
+  number    = {6},
+  pages     = {685--691},
+  doi       = {10.1038/s41587-019-0113-3},
+  url       = {https://doi.org/10.1038/s41587-019-0113-3}
+}
+
+@article{hinton1989connectionist,
+  title     = {Connectionist learning procedures},
+  author    = {Geoffrey E. Hinton},
+  year      = {1989},
+  month     = {Sep},
+  journal   = {Artificial Intelligence},
+  publisher = {Elsevier {BV}},
+  volume    = {40},
+  number    = {1-3},
+  pages     = {185--234},
+  doi       = {10.1016/0004-3702(89)90049-0},
+  url       = {https://doi.org/10.1016/0004-3702(89)90049-0}
+}
+
+@book{hosmer2013applied,
+  title     = {Applied logistic regression},
+  author    = {Hosmer Jr, D.W. and Lemeshow, S. and Sturdivant, R.X.},
+  year      = {2013},
+  publisher = {John Wiley \& Sons},
+  volume    = {398}
+}
+
+@article{hou2019scmatch,
+  title     = {{scMatch}: a single-cell gene expression profile annotation tool using reference datasets},
+  author    = {Rui Hou and Elena Denisenko and Alistair R R Forrest},
+  year      = {2019},
+  month     = {Apr.},
+  journal   = {Bioinformatics},
+  publisher = {Oxford University Press ({OUP})},
+  volume    = {35},
+  number    = {22},
+  pages     = {4688--4695},
+  doi       = {10.1093/bioinformatics/btz292},
+  url       = {https://doi.org/10.1093/bioinformatics/btz292},
+  editor    = {Janet Kelso}
+}
+
+@article{hou2020predicting,
+  title     = {Predicting cell-to-cell communication networks using {NATMI}},
+  author    = {Rui Hou and Elena Denisenko and Huan Ting Ong and Jordan A. Ramilowski and Alistair R. R. Forrest},
+  year      = {2020},
+  month     = {Oct.},
+  journal   = {Nature Communications},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {11},
+  number    = {1},
+  doi       = {10.1038/s41467-020-18873-z},
+  url       = {https://doi.org/10.1038/s41467-020-18873-z}
+}
+
+@article{hou2020systematic,
+  title     = {A systematic evaluation of single-cell {RNA}-sequencing imputation methods},
+  author    = {Wenpin Hou and Zhicheng Ji and Hongkai Ji and Stephanie C. Hicks},
+  year      = {2020},
+  month     = {Aug.},
+  journal   = {Genome Biology},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {21},
+  number    = {1},
+  doi       = {10.1186/s13059-020-02132-x},
+  url       = {https://doi.org/10.1186/s13059-020-02132-x}
+}
+
+@article{hubert1985comparing,
+  doi       = {10.1007/bf01908075},
+  url       = {https://doi.org/10.1007/bf01908075},
+  year      = {1985},
+  month     = {Dec.},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {2},
+  number    = {1},
+  pages     = {193--218},
+  author    = {Lawrence Hubert and Phipps Arabie},
+  title     = {Comparing partitions},
+  journal   = {Journal of Classification}
+}
+
+@article{hu2021spagcn,
+  title     = {SpaGCN: Integrating gene expression, spatial location and histology to identify spatial domains and spatially variable genes by graph convolutional network},
+  author    = {Hu, Jian and Li, Xiangjie and Coleman, Kyle and Schroeder, Amelia and Ma, Nan and Irwin, David J and Lee, Edward B and Shinohara, Russell T and Li, Mingyao},
+  journal   = {Nature methods},
+  volume    = {18},
+  number    = {11},
+  pages     = {1342--1351},
+  year      = {2021},
+  publisher = {Nature Publishing Group US New York},
+  doi       = {10.1038/s41592-021-01255-8}
+}
+
+@article{kats2021spatialde2,
+  title     = {SpatialDE2: fast and localized variance component analysis of spatial transcriptomics},
+  author    = {Kats, Ilia and Vento-Tormo, Roser and Stegle, Oliver},
+  journal   = {Biorxiv},
+  pages     = {2021--10},
+  year      = {2021},
+  publisher = {Cold Spring Harbor Laboratory},
+  doi       = {10.1101/2021.10.27.466045}
+}
+
+@article{kendall1938new,
+  doi       = {10.1093/biomet/30.1-2.81},
+  url       = {https://doi.org/10.1093/biomet/30.1-2.81},
+  year      = {1938},
+  month     = {Jun.},
+  publisher = {Oxford University Press ({OUP})},
+  volume    = {30},
+  number    = {1-2},
+  pages     = {81--93},
+  author    = {M. G. KENDALL},
+  title     = {A new measure of rank correlation},
+  journal   = {Biometrika}
+}
+
+@article{kiselev2019challenges,
+  title     = {Challenges in unsupervised clustering of single-cell {RNA}-seq data},
+  author    = {Vladimir Yu Kiselev and Tallulah S. Andrews and Martin Hemberg},
+  year      = {2019},
+  month     = {Jan},
+  journal   = {Nature Reviews Genetics},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {20},
+  number    = {5},
+  pages     = {273--282},
+  doi       = {10.1038/s41576-018-0088-9},
+  url       = {https://doi.org/10.1038/s41576-018-0088-9}
+}
+
+@article{kleshchevnikov2022cell2location,
+  title     = {Cell2location maps fine-grained cell types in spatial transcriptomics},
+  author    = {Vitalii Kleshchevnikov and Artem Shmatko and Emma Dann and Alexander Aivazidis and Hamish W. King and Tong Li and Rasa Elmentaite and Artem Lomakin and Veronika Kedlian and Adam Gayoso and Mika Sarkin Jain and Jun Sung Park and Lauma Ramona and Elizabeth Tuck and Anna Arutyunyan and Roser Vento-Tormo and Moritz Gerstung and Louisa James and Oliver Stegle and Omer Ali Bayraktar},
+  year      = {2022},
+  month     = {Jan},
+  journal   = {Nature Biotechnology},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {40},
+  number    = {5},
+  pages     = {661--671},
+  doi       = {10.1038/s41587-021-01139-4},
+  url       = {https://doi.org/10.1038/s41587-021-01139-4}
+}
+
+@article{korsunsky2019fast,
+  title     = {Fast,  sensitive and accurate integration of single-cell data with Harmony},
+  author    = {Ilya Korsunsky and Nghia Millard and Jean Fan and Kamil Slowikowski and Fan Zhang and Kevin Wei and Yuriy Baglaenko and Michael Brenner and Po-ru Loh and Soumya Raychaudhuri},
+  year      = {2019},
+  month     = {Nov.},
+  journal   = {Nature Methods},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {16},
+  number    = {12},
+  pages     = {1289--1296},
+  doi       = {10.1038/s41592-019-0619-0},
+  url       = {https://doi.org/10.1038/s41592-019-0619-0}
+}
+
+@article{kraemer2018dimred,
+  title     = {{dimRed} and {coRanking} - Unifying Dimensionality Reduction in R},
+  author    = {Guido Kraemer and Markus Reichstein and Miguel, D. Mahecha},
+  year      = {2018},
+  journal   = {The R Journal},
+  publisher = {The R Foundation},
+  volume    = {10},
+  number    = {1},
+  pages     = {342},
+  doi       = {10.32614/rj-2018-039},
+  url       = {https://doi.org/10.32614/rj-2018-039}
+}
+
+@article{kruskal1964mds,
+  title     = {Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis},
+  author    = {J. B. Kruskal},
+  year      = {1964},
+  month     = {Mar.},
+  journal   = {Psychometrika},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {29},
+  number    = {1},
+  pages     = {1--27},
+  doi       = {10.1007/bf02289565},
+  url       = {https://doi.org/10.1007/bf02289565}
+}
+
+@article{kuppe2022spatial,
+  title     = {Spatial multi-omic map of human myocardial infarction},
+  author    = {Kuppe, Christoph and Ramirez Flores, Ricardo O and Li, Zhijian and Hayat, Sikander and Levinson, Rebecca T and Liao, Xian and Hannani, Monica T and Tanevski, Jovan and W{\"u}nnemann, Florian and Nagai, James S and others},
+  journal   = {Nature},
+  volume    = {608},
+  number    = {7924},
+  pages     = {766--777},
+  year      = {2022},
+  publisher = {Nature Publishing Group UK London}
+}
+
+@article{lance2022multimodal,
+  title        = {Multimodal single cell data integration challenge: results and lessons learned},
+  author       = {Lance, Christopher and Luecken, Malte D. and Burkhardt, Daniel B. and Cannoodt, Robrecht and Rautenstrauch, Pia and Laddach, Anna and Ubingazhibov, Aidyn and Cao, Zhi-Jie and Deng, Kaiwen and Khan, Sumeer and Liu, Qiao and Russkikh, Nikolay and Ryazantsev, Gleb and Ohler, Uwe and , and Pisco, Angela Oliveira and Bloom, Jonathan and Krishnaswamy, Smita and Theis, Fabian J.},
+  year         = {2022},
+  journal      = {bioRxiv},
+  publisher    = {Cold Spring Harbor Laboratory},
+  doi          = {10.1101/2022.04.11.487796},
+  url          = {https://www.biorxiv.org/content/early/2022/04/12/2022.04.11.487796},
+  elocation-id = {2022.04.11.487796},
+  eprint       = {https://www.biorxiv.org/content/early/2022/04/12/2022.04.11.487796.full.pdf}
+}
+
+@article{lance2024predicting,
+  title   = {Predicting cellular profiles across modalities in longitudinal single-cell data: An Open Problems competition},
+  author  = {...},
+  year    = {2024},
+  journal = {In preparation}
+}
+
+@book{lawson1995solving,
+  title     = {Solving Least Squares Problems},
+  author    = {Charles L. Lawson and Richard J. Hanson},
+  year      = {1995},
+  month     = {Jan},
+  publisher = {Society for Industrial and Applied Mathematics},
+  doi       = {10.1137/1.9781611971217},
+  url       = {https://doi.org/10.1137/1.9781611971217}
+}
+
+@article{lee2009quality,
+  title     = {Quality assessment of dimensionality reduction: Rank-based criteria},
+  author    = {John A. Lee and Michel Verleysen},
+  year      = {2009},
+  month     = {Mar.},
+  journal   = {Neurocomputing},
+  publisher = {Elsevier {BV}},
+  volume    = {72},
+  number    = {7-9},
+  pages     = {1431--1443},
+  doi       = {10.1016/j.neucom.2008.12.017},
+  url       = {https://doi.org/10.1016/j.neucom.2008.12.017}
+}
+
+@article{li2021bayesian,
+  author   = {Li, Qiwei and Zhang, Minzhe and Xie, Yang and Xiao, Guanghua},
+  title    = {{Bayesian modeling of spatial molecular profiling data via Gaussian process}},
+  journal  = {Bioinformatics},
+  volume   = {37},
+  number   = {22},
+  pages    = {4129-4136},
+  year     = {2021},
+  month    = {06},
+  abstract = {{The location, timing and abundance of gene expression (both mRNA and proteins) within a tissue define the molecular mechanisms of cell functions. Recent technology breakthroughs in spatial molecular profiling, including imaging-based technologies and sequencing-based technologies, have enabled the comprehensive molecular characterization of single cells while preserving their spatial and morphological contexts. This new bioinformatics scenario calls for effective and robust computational methods to identify genes with spatial patterns.We represent a novel Bayesian hierarchical model to analyze spatial transcriptomics data, with several unique characteristics. It models the zero-inflated and over-dispersed counts by deploying a zero-inflated negative binomial model that greatly increases model stability and robustness. Besides, the Bayesian inference framework allows us to borrow strength in parameter estimation in a de novo fashion. As a result, the proposed model shows competitive performances in accuracy and robustness over existing methods in both simulation studies and two real data applications.The related R/C++ source code is available at https://github.com/Minzhe/BOOST-GP.Supplementary data are available at Bioinformatics online. }},
+  issn     = {1367-4803},
+  doi      = {10.1093/bioinformatics/btab455},
+  url      = {https://doi.org/10.1093/bioinformatics/btab455},
+  eprint   = {https://academic.oup.com/bioinformatics/article-pdf/37/22/4129/50335106/btab455.pdf}
+}
+
+@article{linderman2018zero,
+  title        = {Zero-preserving imputation of scRNA-seq data using low-rank approximation},
+  author       = {Linderman, George C. and Zhao, Jun and Kluger, Yuval},
+  year         = {2018},
+  journal      = {bioRxiv},
+  publisher    = {Cold Spring Harbor Laboratory},
+  doi          = {10.1101/397588},
+  url          = {https://www.biorxiv.org/content/early/2018/08/22/397588},
+  elocation-id = {397588},
+  eprint       = {https://www.biorxiv.org/content/early/2018/08/22/397588.full.pdf}
+}
+
+@article{liu2020high,
+  title     = {High-Spatial-Resolution Multi-Omics Sequencing via Deterministic Barcoding in Tissue},
+  volume    = {183},
+  issn      = {0092-8674},
+  url       = {http://dx.doi.org/10.1016/j.cell.2020.10.026},
+  doi       = {10.1016/j.cell.2020.10.026},
+  number    = {6},
+  journal   = {Cell},
+  publisher = {Elsevier BV},
+  author    = {Liu,  Yang and Yang,  Mingyu and Deng,  Yanxiang and Su,  Graham and Enninful,  Archibald and Guo,  Cindy C. and Tebaldi,  Toma and Zhang,  Di and Kim,  Dongjoo and Bai,  Zhiliang and Norris,  Eileen and Pan,  Alisia and Li,  Jiatong and Xiao,  Yang and Halene,  Stephanie and Fan,  Rong},
+  year      = {2020},
+  month     = dec,
+  pages     = {1665--1681.e18}
+}
+
+@article{lohoff2021integration,
+  title     = {Integration of spatial and single-cell transcriptomic data elucidates mouse organogenesis},
+  volume    = {40},
+  issn      = {1546-1696},
+  url       = {http://dx.doi.org/10.1038/s41587-021-01006-2},
+  doi       = {10.1038/s41587-021-01006-2},
+  number    = {1},
+  journal   = {Nature Biotechnology},
+  publisher = {Springer Science and Business Media LLC},
+  author    = {Lohoff,  T. and Ghazanfar,  S. and Missarova,  A. and Koulena,  N. and Pierson,  N. and Griffiths,  J. A. and Bardot,  E. S. and Eng,  C.-H. L. and Tyser,  R. C. V. and Argelaguet,  R. and Guibentif,  C. and Srinivas,  S. and Briscoe,  J. and Simons,  B. D. and Hadjantonakis,  A.-K. and G\"{o}ttgens,  B. and Reik,  W. and Nichols,  J. and Cai,  L. and Marioni,  J. C.},
+  year      = {2021},
+  month     = sep,
+  pages     = {74-85}
+}
+
+@article{lopez2018deep,
+  title     = {Deep generative modeling for single-cell transcriptomics},
+  author    = {Romain Lopez and Jeffrey Regier and Michael B. Cole and Michael I. Jordan and Nir Yosef},
+  year      = {2018},
+  month     = {Nov.},
+  journal   = {Nature Methods},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {15},
+  number    = {12},
+  pages     = {1053--1058},
+  doi       = {10.1038/s41592-018-0229-2},
+  url       = {https://doi.org/10.1038/s41592-018-0229-2}
+}
+
+@article{lopez2022destvi,
+  title     = {{DestVI} identifies continuums of cell types in spatial transcriptomics data},
+  author    = {Romain Lopez and Baoguo Li and Hadas Keren-Shaul and Pierre Boyeau and Merav Kedmi and David Pilzer and Adam Jelinski and Ido Yofe and Eyal David and Allon Wagner and Can Ergen and Yoseph Addadi and Ofra Golani and Franca Ronchese and Michael I. Jordan and Ido Amit and Nir Yosef},
+  year      = {2022},
+  month     = {Apr.},
+  journal   = {Nature Biotechnology},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {40},
+  number    = {9},
+  pages     = {1360--1369},
+  doi       = {10.1038/s41587-022-01272-8},
+  url       = {https://doi.org/10.1038/s41587-022-01272-8}
+}
+
+@article{lotfollahi2020query,
+  title        = {Query to reference single-cell integration with transfer learning},
+  author       = {Lotfollahi, Mohammad and Naghipourfar, Mohsen and Luecken, Malte D. and Khajavi, Matin and B{\"u}ttner, Maren and Avsec, Ziga and Misharin, Alexander V. and Theis, Fabian J.},
+  year         = {2020},
+  journal      = {bioRxiv},
+  publisher    = {Cold Spring Harbor Laboratory},
+  doi          = {10.1101/2020.07.16.205997},
+  url          = {https://doi.org/10.1101/2020.07.16.205997},
+  elocation-id = {2020.07.16.205997},
+  eprint       = {https://www.biorxiv.org/content/early/2020/07/16/2020.07.16.205997.full.pdf}
+}
+
+@article{luecken2022benchmarking,
+  title     = {Benchmarking atlas-level data integration in single-cell genomics},
+  author    = {Malte D. Luecken and M. B\"{u}ttner and K. Chaichoompu and A. Danese and M. Interlandi and M. F. Mueller and D. C. Strobl and L. Zappia and M. Dugas and M. Colom{\'{e}}-Tatch{\'{e}} and Fabian J. Theis},
+  year      = {2021},
+  month     = {Dec.},
+  journal   = {Nature Methods},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {19},
+  number    = {1},
+  pages     = {41--50},
+  doi       = {10.1038/s41592-021-01336-8},
+  url       = {https://doi.org/10.1038/s41592-021-01336-8}
+}
+
+@article{lueks2011evaluate,
+  title     = {How to Evaluate Dimensionality Reduction? - Improving the Co-ranking Matrix},
+  author    = {Lueks, Wouter and Mokbel, Bassam and Biehl, Michael and Hammer, Barbara},
+  year      = {2011},
+  journal   = {arXiv},
+  doi       = {10.48550/ARXIV.1110.3917},
+  url       = {https://arxiv.org/abs/1110.3917},
+  copyright = {arXiv.org perpetual, non-exclusive license},
+  keywords  = {Machine Learning (cs.LG), Information Retrieval (cs.IR), FOS: Computer and information sciences, FOS: Computer and information sciences}
+}
+
+@misc{lun2019fastmnn,
+  title  = {A description of the theory behind the fastMNN algorithm},
+  author = {Lun, Aaron},
+  year   = {2019},
+  url    = {https://marionilab.github.io/FurtherMNN2018/theory/description.html}
+}
+
+@article{mcinnes2018umap,
+  title     = {UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction},
+  author    = {McInnes,  Leland and Healy,  John and Melville,  James},
+  year      = {2018},
+  journal   = {arXiv},
+  publisher = {Cornell University},
+  doi       = {10.48550/arxiv.1802.03426},
+  url       = {https://arxiv.org/abs/1802.03426},
+  copyright = {arXiv.org perpetual,  non-exclusive license},
+  keywords  = {Machine Learning (stat.ML),  Computational Geometry (cs.CG),  Machine Learning (cs.LG),  FOS: Computer and information sciences,  FOS: Computer and information sciences}
+}
+
+@article{mereu2020benchmarking,
+  doi       = {10.1038/s41587-020-0469-4},
+  author    = {Mereu, Elisabetta and Lafzi, Atefeh and Moutinho, Catia and Ziegenhain, Christoph and McCarthy, Davis J and Alvarez-Varela, Adrian and Batlle, Eduard and Sagar and Gruen, Dominic and Lau, Julia K and others},
+  journal   = {Nature biotechnology},
+  number    = {6},
+  pages     = {747--755},
+  publisher = {Nature Publishing Group US New York},
+  title     = {Benchmarking single-cell {RNA}-sequencing protocols for cell atlas projects},
+  volume    = {38},
+  year      = {2020}
+}
+
+@inbook{miles2005rsquared,
+  title     = "{{R-Squared}, Adjusted {R-Squared}}",
+  chapter   = "{{R-Squared}, Adjusted {R-Squared}}",
+  author    = "Miles, Jeremy",
+  booktitle = "{Encyclopedia of Statistics in Behavioral Science}",
+  publisher = "John Wiley \& Sons, Ltd",
+  address   = "Chichester, UK",
+  month     =  oct,
+  year      =  2005,
+  doi       = "10.1002/0470013192.bsa526",
+  isbn      = "9780470860809,9780470860809"
+}
+
+@article{moon2019visualizing,
+  title     = {Visualizing structure and transitions in high-dimensional biological data},
+  author    = {Kevin R. Moon and David van Dijk and Zheng Wang and Scott Gigante and Daniel B. Burkhardt and William S. Chen and Kristina Yim and Antonia van den Elzen and Matthew J. Hirn and Ronald R. Coifman and Natalia B. Ivanova and Guy Wolf and Smita Krishnaswamy},
+  year      = {2019},
+  month     = {Dec.},
+  journal   = {Nature Biotechnology},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {37},
+  number    = {12},
+  pages     = {1482--1492},
+  doi       = {10.1038/s41587-019-0336-3},
+  url       = {https://doi.org/10.1038/s41587-019-0336-3}
+}
+
+@article{narayan2021assessing,
+  title     = {Assessing single-cell transcriptomic variability through density-preserving data visualization},
+  author    = {Ashwin Narayan and Bonnie Berger and Hyunghoon Cho},
+  year      = {2021},
+  month     = {Jan},
+  journal   = {Nature Biotechnology},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {39},
+  number    = {6},
+  pages     = {765--774},
+  doi       = {10.1038/s41587-020-00801-7},
+  url       = {https://doi.org/10.1038/s41587-020-00801-7}
+}
+
+@article{nestorowa2016single,
+  title     = {A single-cell resolution map of mouse hematopoietic stem and progenitor cell differentiation},
+  author    = {Sonia Nestorowa and Fiona K. Hamey and Blanca Pijuan Sala and Evangelia Diamanti and Mairi Shepherd and Elisa Laurenti and Nicola K. Wilson and David G. Kent and Berthold G\"{o}ttgens},
+  year      = {2016},
+  month     = {Aug.},
+  journal   = {Blood},
+  publisher = {American Society of Hematology},
+  volume    = {128},
+  number    = {8},
+  pages     = {e20--e31},
+  doi       = {10.1182/blood-2016-05-716480},
+  url       = {https://doi.org/10.1182/blood-2016-05-716480}
+}
+
+@inproceedings{luecken2021neurips,
+  author    = {Luecken, Malte and Burkhardt, Daniel and Cannoodt, Robrecht and Lance, Christopher and Agrawal, Aditi and Aliee, Hananeh and Chen, Ann and Deconinck, Louise and Detweiler, Angela and Granados, Alejandro and Huynh, Shelly and Isacco, Laura and Kim, Yang and Klein, Dominik and DE KUMAR, BONY and Kuppasani, Sunil and Lickert, Heiko and McGeever, Aaron and Melgarejo, Joaquin and Mekonen, Honey and Morri, Maurizio and M\"{u}ller, Michaela and Neff, Norma and Paul, Sheryl and Rieck, Bastian and Schneider, Kaylie and Steelman, Scott and Sterr, Michael and Treacy, Daniel and Tong, Alexander and Villani, Alexandra-Chloe and Wang, Guilin and Yan, Jia and Zhang, Ce and Pisco, Angela and Krishnaswamy, Smita and Theis, Fabian and Bloom, Jonathan M},
+  booktitle = {Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks},
+  editor    = {J. Vanschoren and S. Yeung},
+  pages     = {},
+  publisher = {Curran},
+  title     = {A sandbox for prediction and integration of DNA, RNA, and proteins in single cells},
+  url       = {https://datasets-benchmarks-proceedings.neurips.cc/paper_files/paper/2021/file/158f3069a435b314a80bdcb024f8e422-Paper-round2.pdf},
+  volume    = {1},
+  year      = {2021}
+}
+
+@article{olsson2016single,
+  title     = {Single-cell analysis of mixed-lineage states leading to a binary cell fate choice},
+  author    = {Andre Olsson and Meenakshi Venkatasubramanian and Viren K. Chaudhri and Bruce J. Aronow and Nathan Salomonis and Harinder Singh and H. Leighton Grimes},
+  year      = {2016},
+  month     = {Aug.},
+  journal   = {Nature},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {537},
+  number    = {7622},
+  pages     = {698--702},
+  doi       = {10.1038/nature19348},
+  url       = {https://doi.org/10.1038/nature19348}
+}
+
+@misc{openproblems,
+  title  = {Open Problems},
+  author = {{Open Problems for Single Cell Analysis Consortium}},
+  year   = {2022},
+  url    = {https://openproblems.bio}
+}
+
+@article{palla2022squidpy,
+  title     = {Squidpy: a scalable framework for spatial omics analysis},
+  author    = {Palla, Giovanni and Spitzer, Hannah and Klein, Michal and Fischer, David and Schaar, Anna Christina and Kuemmerle, Louis Benedikt and Rybakov, Sergei and Ibarra, Ignacio L and Holmberg, Olle and Virshup, Isaac and others},
+  journal   = {Nature methods},
+  volume    = {19},
+  number    = {2},
+  pages     = {171--178},
+  year      = {2022},
+  publisher = {Nature Publishing Group US New York},
+  doi       = {10.1038/s41592-021-01358-2}
+}
+
+@article{pearson1895regression,
+  doi       = {10.1098/rspl.1895.0041},
+  title     = {VII. Note on regression and inheritance in the case of two parents},
+  author    = {Pearson, Karl},
+  journal   = {proceedings of the royal society of London},
+  volume    = {58},
+  number    = {347-352},
+  pages     = {240--242},
+  year      = {1895},
+  publisher = {The Royal Society London}
+}
+
+@article{pearson1901pca,
+  title     = {On lines and planes of closest fit to systems of points in space},
+  author    = {Karl Pearson},
+  year      = {1901},
+  month     = {Nov.},
+  journal   = {The London,  Edinburgh,  and Dublin Philosophical Magazine and Journal of Science},
+  publisher = {Informa {UK} Limited},
+  volume    = {2},
+  number    = {11},
+  pages     = {559--572},
+  doi       = {10.1080/14786440109462720},
+  url       = {https://doi.org/10.1080/14786440109462720}
+}
+
+@article{pliner2019supervised,
+  title     = {Supervised classification enables rapid annotation of cell atlases},
+  author    = {Hannah A. Pliner and Jay Shendure and Cole Trapnell},
+  year      = {2019},
+  month     = {Sep},
+  journal   = {Nature Methods},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {16},
+  number    = {10},
+  pages     = {983--986},
+  doi       = {10.1038/s41592-019-0535-3},
+  url       = {https://doi.org/10.1038/s41592-019-0535-3}
+}
+
+@article{polanski2020bbknn,
+  title     = {{BBKNN}: fast batch alignment of single cell transcriptomes},
+  author    = {Krzysztof Pola{\'{n}}ski and Matthew D Young and Zhichao Miao and Kerstin B Meyer and Sarah A Teichmann and Jong-Eun Park},
+  year      = {2019},
+  month     = {Aug.},
+  journal   = {Bioinformatics},
+  publisher = {Oxford University Press ({OUP})},
+  doi       = {10.1093/bioinformatics/btz625},
+  url       = {https://doi.org/10.1093/bioinformatics/btz625},
+  editor    = {Bonnie Berger}
+}
+
+@article{raredon2022computation,
+  title     = {Computation and visualization of cell-cell signaling topologies in single-cell systems data using Connectome},
+  author    = {Micha Sam Brickman Raredon and Junchen Yang and James Garritano and Meng Wang and Dan Kushnir and Jonas Christian Schupp and Taylor S. Adams and Allison M. Greaney and Katherine L. Leiby and Naftali Kaminski and Yuval Kluger and Andre Levchenko and Laura E. Niklason},
+  year      = {2022},
+  month     = {Mar.},
+  journal   = {Scientific Reports},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {12},
+  number    = {1},
+  doi       = {10.1038/s41598-022-07959-x},
+  url       = {https://doi.org/10.1038/s41598-022-07959-x}
+}
+
+@article{rodriques2019slide,
+  title     = {Slide-seq: A scalable technology for measuring genome-wide expression at high spatial resolution},
+  author    = {Samuel G. Rodriques and Robert R. Stickels and Aleksandrina Goeva and Carly A. Martin and Evan Murray and Charles R. Vanderburg and Joshua Welch and Linlin M. Chen and Fei Chen and Evan Z. Macosko},
+  year      = {2019},
+  month     = {Mar.},
+  journal   = {Science},
+  publisher = {American Association for the Advancement of Science ({AAAS})},
+  volume    = {363},
+  number    = {6434},
+  pages     = {1463--1467},
+  doi       = {10.1126/science.aaw1219},
+  url       = {https://doi.org/10.1126/science.aaw1219}
+}
+
+@article{russell2023slide,
+  title     = {Slide-tags enables single-nucleus barcoding for multimodal spatial genomics},
+  volume    = {625},
+  issn      = {1476-4687},
+  url       = {http://dx.doi.org/10.1038/s41586-023-06837-4},
+  doi       = {10.1038/s41586-023-06837-4},
+  number    = {7993},
+  journal   = {Nature},
+  publisher = {Springer Science and Business Media LLC},
+  author    = {Russell,  Andrew J. C. and Weir,  Jackson A. and Nadaf,  Naeem M. and Shabet,  Matthew and Kumar,  Vipin and Kambhampati,  Sandeep and Raichur,  Ruth and Marrero,  Giovanni J. and Liu,  Sophia and Balderrama,  Karol S. and Vanderburg,  Charles R. and Shanmugam,  Vignesh and Tian,  Luyi and Iorgulescu,  J. Bryan and Yoon,  Charles H. and Wu,  Catherine J. and Macosko,  Evan Z. and Chen,  Fei},
+  year      = {2023},
+  month     = dec,
+  pages     = {101–109}
+}
+
+@inproceedings{santos2009on,
+  author    = {Santos, Jorge M. and Embrechts, Mark"},
+  editor    = {Alippi, Cesare and Polycarpou, Marios and Panayiotou, Christos and Ellinas, Georgios},
+  title     = {On the Use of the Adjusted Rand Index as a Metric for Evaluating Supervised Classification},
+  booktitle = {Artificial Neural Networks -- ICANN 2009},
+  year      = {2009},
+  publisher = {Springer Berlin Heidelberg},
+  address   = {Berlin, Heidelberg},
+  pages     = {175--184},
+  isbn      = {978-3-642-04277-5},
+  doi       = {10.1007/978-3-642-04277-5_18},
+  url       = {https://doi.org/10.1007/978-3-642-04277-5_18}
+}
+
+@article{sarkar2021separating,
+  title     = {Separating measurement and expression models clarifies confusion in single-cell {RNA} sequencing analysis},
+  author    = {Abhishek Sarkar and Matthew Stephens},
+  year      = {2021},
+  month     = {May},
+  journal   = {Nature Genetics},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {53},
+  number    = {6},
+  pages     = {770--777},
+  doi       = {10.1038/s41588-021-00873-4},
+  url       = {https://doi.org/10.1038/s41588-021-00873-4}
+}
+
+@article{schober2018correlation,
+  title     = {Correlation Coefficients},
+  author    = {Patrick Schober and Christa Boer and Lothar A. Schwarte},
+  year      = {2018},
+  month     = {May},
+  journal   = {Anesthesia {\&} Analgesia},
+  publisher = {Ovid Technologies (Wolters Kluwer Health)},
+  volume    = {126},
+  number    = {5},
+  pages     = {1763--1768},
+  doi       = {10.1213/ane.0000000000002864},
+  url       = {https://doi.org/10.1213/ane.0000000000002864}
+}
+
+@inproceedings{stanley2020harmonic,
+  title     = {Harmonic Alignment},
+  author    = {Jay S. Stanley and Scott Gigante and Guy Wolf and Smita Krishnaswamy},
+  year      = {2020},
+  month     = {Jan},
+  booktitle = {Proceedings of the 2020 {SIAM} International Conference on Data Mining},
+  publisher = {Society for Industrial and Applied Mathematics},
+  pages     = {316--324},
+  doi       = {10.1137/1.9781611976236.36},
+  url       = {https://doi.org/10.1137/1.9781611976236.36}
+}
+
+@article{stickels2020highly,
+  title     = {Highly sensitive spatial transcriptomics at near-cellular resolution with Slide-seqV2},
+  volume    = {39},
+  issn      = {1546-1696},
+  url       = {http://dx.doi.org/10.1038/s41587-020-0739-1},
+  doi       = {10.1038/s41587-020-0739-1},
+  number    = {3},
+  journal   = {Nature Biotechnology},
+  publisher = {Springer Science and Business Media LLC},
+  author    = {Stickels,  Robert R. and Murray,  Evan and Kumar,  Pawan and Li,  Jilong and Marshall,  Jamie L. and Di Bella,  Daniela J. and Arlotta,  Paola and Macosko,  Evan Z. and Chen,  Fei},
+  year      = {2020},
+  month     = dec,
+  pages     = {313–319}
+}
+
+@article{stoeckius2017simultaneous,
+  title     = {Simultaneous epitope and transcriptome measurement in single cells},
+  author    = {Marlon Stoeckius and Christoph Hafemeister and William Stephenson and Brian Houck-Loomis and Pratip K Chattopadhyay and Harold Swerdlow and Rahul Satija and Peter Smibert},
+  year      = {2017},
+  month     = {Jul.},
+  journal   = {Nature Methods},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {14},
+  number    = {9},
+  pages     = {865--868},
+  doi       = {10.1038/nmeth.4380},
+  url       = {https://doi.org/10.1038/nmeth.4380}
+}
+
+@article{stuart2019comprehensive,
+  title   = {Comprehensive Integration of Single-Cell Data},
+  author  = {Stuart, T. and Butler, A. and Hoffman, P. and Hafemeister, C. and Papalexi, E. and Mauck, W.M. and Hao, Y. and Stoeckius, M. and Smibert, P. and Satija, R.},
+  year    = {2019},
+  journal = {Cell},
+  volume  = {177},
+  number  = {7},
+  pages   = {1888--1902.e21},
+  doi     = {10.1016/j.cell.2019.05.031}
+}
+
+@article{sun2020statistical,
+  title     = {Statistical analysis of spatial expression patterns for spatially resolved transcriptomic studies},
+  author    = {Sun, Shiquan and Zhu, Jiaqiang and Zhou, Xiang},
+  journal   = {Nature methods},
+  volume    = {17},
+  number    = {2},
+  pages     = {193--200},
+  year      = {2020},
+  publisher = {Nature Publishing Group US New York},
+  doi       = {10.1038/s41592-019-0701-7}
+}
+
+@article{svensson2018spatialde,
+  title     = {SpatialDE: identification of spatially variable genes},
+  author    = {Svensson, Valentine and Teichmann, Sarah A and Stegle, Oliver},
+  journal   = {Nature methods},
+  volume    = {15},
+  number    = {5},
+  pages     = {343--346},
+  year      = {2018},
+  publisher = {Nature Publishing Group},
+  doi       = {10.1038/nmeth.4636}
+}
+
+@article{szubert2019structurepreserving,
+  title     = {Structure-preserving visualisation of high dimensional single-cell datasets},
+  author    = {Benjamin Szubert and Jennifer E. Cole and Claudia Monaco and Ignat Drozdov},
+  year      = {2019},
+  month     = {Jun.},
+  journal   = {Scientific Reports},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {9},
+  number    = {1},
+  doi       = {10.1038/s41598-019-45301-0},
+  url       = {https://doi.org/10.1038/s41598-019-45301-0}
+}
+
+@article{tabula2018single,
+  title     = {Single-cell transcriptomics of 20 mouse organs creates a Tabula Muris},
+  author    = {{Tabula Muris Consortium}},
+  year      = {2018},
+  month     = {Oct.},
+  journal   = {Nature},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {562},
+  number    = {7727},
+  pages     = {367--372},
+  doi       = {10.1038/s41586-018-0590-4},
+  url       = {https://doi.org/10.1038/s41586-018-0590-4}
+}
+
+@article{tabula2020single,
+  title     = {A single-cell transcriptomic atlas characterizes ageing tissues in the mouse},
+  author    = {{Tabula Muris Consortium}},
+  year      = {2020},
+  month     = {Jul.},
+  journal   = {Nature},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {583},
+  number    = {7817},
+  pages     = {590--595},
+  doi       = {10.1038/s41586-020-2496-1},
+  url       = {https://doi.org/10.1038/s41586-020-2496-1}
+}
+
+@article{tasic2016adult,
+  title     = {Adult mouse cortical cell taxonomy revealed by single cell transcriptomics},
+  author    = {Bosiljka Tasic and Vilas Menon and Thuc Nghi Nguyen and Tae Kyung Kim and Tim Jarsky and Zizhen Yao and Boaz Levi and Lucas T Gray and Staci A Sorensen and Tim Dolbeare and Darren Bertagnolli and Jeff Goldy and Nadiya Shapovalova and Sheana Parry and Changkyu Lee and Kimberly Smith and Amy Bernard and Linda Madisen and Susan M Sunkin and Michael Hawrylycz and Christof Koch and Hongkui Zeng},
+  year      = {2016},
+  month     = {Jan},
+  journal   = {Nature Neuroscience},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {19},
+  number    = {2},
+  pages     = {335--346},
+  doi       = {10.1038/nn.4216},
+  url       = {https://doi.org/10.1038/nn.4216}
+}
+
+@article{tian2019benchmarking,
+  title     = {Benchmarking single cell {RNA}-sequencing analysis pipelines using mixture control experiments},
+  author    = {Luyi Tian and Xueyi Dong and Saskia Freytag and Kim-Anh L{\^{e}} Cao and Shian Su and Abolfazl JalalAbadi and Daniela Amann-Zalcenstein and Tom S. Weber and Azadeh Seidi and Jafar S. Jabbari and Shalin H. Naik and Matthew E. Ritchie},
+  year      = {2019},
+  month     = {May},
+  journal   = {Nature Methods},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {16},
+  number    = {6},
+  pages     = {479--487},
+  doi       = {10.1038/s41592-019-0425-8},
+  url       = {https://doi.org/10.1038/s41592-019-0425-8}
+}
+
+@article{tran2020benchmark,
+  doi       = {10.1186/s13059-019-1850-9},
+  url       = {https://doi.org/10.1186/s13059-019-1850-9},
+  year      = {2020},
+  month     = {Jan},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {21},
+  number    = {1},
+  author    = {Hoa Thi Nhu Tran and Kok Siong Ang and Marion Chevrier and Xiaomeng Zhang and Nicole Yee Shin Lee and Michelle Goh and Jinmiao Chen},
+  title     = {A benchmark of batch-effect correction methods for single-cell {RNA} sequencing data},
+  journal   = {Genome Biology}
+}
+
+@article{van2018recovering,
+  title     = {Recovering Gene Interactions from Single-Cell Data Using Data Diffusion},
+  author    = {David van Dijk and Roshan Sharma and Juozas Nainys and Kristina Yim and Pooja Kathail and Ambrose J. Carr and Cassandra Burdziak and Kevin R. Moon and Christine L. Chaffer and Diwakar Pattabiraman and Brian Bierie and Linas Mazutis and Guy Wolf and Smita Krishnaswamy and Dana Pe'er},
+  year      = {2018},
+  month     = {Jul.},
+  journal   = {Cell},
+  publisher = {Elsevier {BV}},
+  volume    = {174},
+  number    = {3},
+  pages     = {716--729.e27},
+  doi       = {10.1016/j.cell.2018.05.061},
+  url       = {https://doi.org/10.1016/j.cell.2018.05.061}
+}
+
+@article{vandermaaten2008visualizing,
+  title   = {Visualizing Data using t-SNE},
+  author  = {{van der} Maaten, Laurens and Hinton, Geoffrey},
+  year    = {2008},
+  journal = {Journal of Machine Learning Research},
+  volume  = {9},
+  number  = {86},
+  pages   = {2579--2605},
+  url     = {http://jmlr.org/papers/v9/vandermaaten08a.html}
+}
+
+@inproceedings{venna2001neighborhood,
+  title     = {Neighborhood Preservation in Nonlinear Projection Methods: An Experimental Study},
+  author    = {Jarkko Venna and Samuel Kaski},
+  year      = {2001},
+  booktitle = {Artificial Neural Networks {\textemdash} {ICANN} 2001},
+  publisher = {Springer Berlin Heidelberg},
+  pages     = {485--491},
+  doi       = {{10.1007/3-540-44668-0_68}},
+  url       = {{https://doi.org/10.1007/3-540-44668-0_68}}
+}
+
+@article{venna2006local,
+  title     = {Local multidimensional scaling},
+  author    = {Jarkko Venna and Samuel Kaski},
+  year      = {2006},
+  month     = {Jul.},
+  journal   = {Neural Networks},
+  publisher = {Elsevier {BV}},
+  volume    = {19},
+  number    = {6-7},
+  pages     = {889--899},
+  doi       = {10.1016/j.neunet.2006.05.014},
+  url       = {https://doi.org/10.1016/j.neunet.2006.05.014}
+}
+
+@article{virshup2021anndataannotateddata,
+  title    = {{anndata: Annotated data}},
+  author   = {Virshup, Isaac and Rybakov, Sergei and Theis, Fabian J and
+              Angerer, Philipp and Alexander Wolf, F},
+  journal  = {bioRxiv},
+  pages    = {2021.12.16.473007},
+  month    = dec,
+  year     = 2021,
+  doi      = {10.1101/2021.12.16.473007},
+  language = {en}
+}
+
+@article{wagner2018knearest,
+  title        = {K-nearest neighbor smoothing for high-throughput single-cell RNA-Seq data},
+  author       = {Wagner, Florian and Yan, Yun and Yanai, Itai},
+  year         = {2018},
+  journal      = {bioRxiv},
+  publisher    = {Cold Spring Harbor Laboratory},
+  doi          = {10.1101/217737},
+  url          = {https://www.biorxiv.org/content/early/2018/04/09/217737},
+  elocation-id = {217737},
+  eprint       = {https://www.biorxiv.org/content/early/2018/04/09/217737.full.pdf}
+}
+
+@article{wagner2018single,
+  title     = {Single-cell mapping of gene expression landscapes and lineage in the zebrafish embryo},
+  author    = {Daniel E. Wagner and Caleb Weinreb and Zach M. Collins and James A. Briggs and Sean G. Megason and Allon M. Klein},
+  year      = {2018},
+  month     = {Jun.},
+  journal   = {Science},
+  publisher = {American Association for the Advancement of Science ({AAAS})},
+  volume    = {360},
+  number    = {6392},
+  pages     = {981--987},
+  doi       = {10.1126/science.aar4362},
+  url       = {https://doi.org/10.1126/science.aar4362}
+}
+
+@article{wang2013target,
+  title     = {Target analysis by integration of transcriptome and {ChIP}-seq data with {BETA}},
+  author    = {Su Wang and Hanfei Sun and Jian Ma and Chongzhi Zang and Chenfei Wang and Juan Wang and Qianzi Tang and Clifford A Meyer and Yong Zhang and X Shirley Liu},
+  year      = {2013},
+  month     = {Nov.},
+  journal   = {Nature Protocols},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {8},
+  number    = {12},
+  pages     = {2502--2515},
+  doi       = {10.1038/nprot.2013.150},
+  url       = {https://doi.org/10.1038/nprot.2013.150}
+}
+
+@article{wang2017visualization,
+  title     = {Visualization and analysis of single-cell {RNA}-seq data by kernel-based similarity learning},
+  volume    = {14},
+  copyright = {2017 Springer Nature America, Inc.},
+  issn      = {1548-7105},
+  url       = {https://www.nature.com/articles/nmeth.4207},
+  doi       = {10.1038/nmeth.4207},
+  abstract  = {The SIMLR software identifies similarities between cells across a range of single-cell RNA-seq data, enabling effective dimension reduction, clustering and visualization.},
+  language  = {en},
+  number    = {4},
+  journal   = {Nature Methods},
+  author    = {Wang, Bo and Zhu, Junjie and Pierson, Emma and Ramazzotti, Daniele and Batzoglou, Serafim},
+  month     = apr,
+  year      = {2017},
+  publisher = {Nature Publishing Group},
+  keywords  = {Gene expression, Genome informatics, Machine learning, Statistical methods},
+  pages     = {414--416}
+}
+
+@article{wang2018three,
+  title     = {Three-dimensional intact-tissue sequencing of single-cell transcriptional states},
+  volume    = {361},
+  issn      = {1095-9203},
+  url       = {http://dx.doi.org/10.1126/science.aat5691},
+  doi       = {10.1126/science.aat5691},
+  number    = {6400},
+  journal   = {Science},
+  publisher = {American Association for the Advancement of Science (AAAS)},
+  author    = {Wang,  Xiao and Allen,  William E. and Wright,  Matthew A. and Sylwestrak,  Emily L. and Samusik,  Nikolay and Vesuna,  Sam and Evans,  Kathryn and Liu,  Cindy and Ramakrishnan,  Charu and Liu,  Jia and Nolan,  Garry P. and Bava,  Felice-Alessio and Deisseroth,  Karl},
+  year      = {2018},
+  month     = jul
+}
+
+@article{wang2022high,
+  title     = {High-resolution 3D spatiotemporal transcriptomic maps of developing Drosophila embryos and larvae},
+  volume    = {57},
+  issn      = {1534-5807},
+  url       = {http://dx.doi.org/10.1016/j.devcel.2022.04.006},
+  doi       = {10.1016/j.devcel.2022.04.006},
+  number    = {10},
+  journal   = {Developmental Cell},
+  publisher = {Elsevier BV},
+  author    = {Wang,  Mingyue and Hu,  Qinan and Lv,  Tianhang and Wang,  Yuhang and Lan,  Qing and Xiang,  Rong and Tu,  Zhencheng and Wei,  Yanrong and Han,  Kai and Shi,  Chang and Guo,  Junfu and Liu,  Chao and Yang,  Tao and Du,  Wensi and An,  Yanru and Cheng,  Mengnan and Xu,  Jiangshan and Lu,  Haorong and Li,  Wangsheng and Zhang,  Shaofang and Chen,  Ao and Chen,  Wei and Li,  Yuxiang and Wang,  Xiaoshan and Xu,  Xun and Hu,  Yuhui and Liu,  Longqi},
+  year      = {2022},
+  month     = may,
+  pages     = {1271--1283.e4}
+}
+
+@article{weber2023nnsvg,
+  title     = {nnSVG for the scalable identification of spatially variable genes using nearest-neighbor Gaussian processes},
+  author    = {Weber, Lukas M and Saha, Arkajyoti and Datta, Abhirup and Hansen, Kasper D and Hicks, Stephanie C},
+  journal   = {Nature communications},
+  volume    = {14},
+  number    = {1},
+  pages     = {4059},
+  year      = {2023},
+  publisher = {Nature Publishing Group UK London},
+  doi       = {10.1038/s41467-023-39748-z}
+}
+
+@article{welch2019single,
+  title     = {Single-Cell Multi-omic Integration Compares and Contrasts Features of Brain Cell Identity},
+  author    = {Joshua D. Welch and Velina Kozareva and Ashley Ferreira and Charles Vanderburg and Carly Martin and Evan Z. Macosko},
+  year      = {2019},
+  month     = {Jun.},
+  journal   = {Cell},
+  publisher = {Elsevier {BV}},
+  volume    = {177},
+  number    = {7},
+  pages     = {1873--1887.e17},
+  doi       = {10.1016/j.cell.2019.05.006},
+  url       = {https://doi.org/10.1016/j.cell.2019.05.006}
+}
+
+@article{wilkinson1973symbolic,
+  doi       = {10.2307/2346786},
+  url       = {https://doi.org/10.2307/2346786},
+  year      = {1973},
+  publisher = {{JSTOR}},
+  volume    = {22},
+  number    = {3},
+  pages     = {392},
+  author    = {G. N. Wilkinson and C. E. Rogers},
+  title     = {Symbolic Description of Factorial Models for Analysis of Variance},
+  journal   = {Applied Statistics}
+}
+
+@article{wu2021single,
+  title     = {A single-cell and spatially resolved atlas of human breast cancers},
+  author    = {Sunny Z. Wu and Ghamdan Al-Eryani and Daniel Lee Roden and Simon Junankar and Kate Harvey and Alma Andersson and Aatish Thennavan and Chenfei Wang and James R. Torpy and Nenad Bartonicek and Taopeng Wang and Ludvig Larsson and Dominik Kaczorowski and Neil I. Weisenfeld and Cedric R. Uytingco and Jennifer G. Chew and Zachary W. Bent and Chia-Ling Chan and Vikkitharan Gnanasambandapillai and Charles-Antoine Dutertre and Laurence Gluch and Mun N. Hui and Jane Beith and Andrew Parker and Elizabeth Robbins and Davendra Segara and Caroline Cooper and Cindy Mak and Belinda Chan and Sanjay Warrier and Florent Ginhoux and Ewan Millar and Joseph E. Powell and Stephen R. Williams and X. Shirley Liu and Sandra O'Toole and Elgene Lim and Joakim Lundeberg and Charles M. Perou and Alexander Swarbrick},
+  year      = {2021},
+  month     = {Sep},
+  journal   = {Nature Genetics},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {53},
+  number    = {9},
+  pages     = {1334--1347},
+  doi       = {10.1038/s41588-021-00911-1},
+  url       = {https://doi.org/10.1038/s41588-021-00911-1}
+}
+
+@article{xiong2020neuralee,
+  title     = {{NeuralEE}: A {GPU}-Accelerated Elastic Embedding Dimensionality Reduction Method for Visualizing Large-Scale {scRNA}-Seq Data},
+  author    = {Jiankang Xiong and Fuzhou Gong and Lin Wan and Liang Ma},
+  year      = {2020},
+  month     = {Oct.},
+  journal   = {Frontiers in Genetics},
+  publisher = {Frontiers Media {SA}},
+  volume    = {11},
+  doi       = {10.3389/fgene.2020.00786},
+  url       = {https://doi.org/10.3389/fgene.2020.00786}
+}
+
+@article{xiong2021online,
+  title     = {Online single-cell data integration through projecting heterogeneous datasets into a common cell-embedding space},
+  author    = {Lei Xiong and Kang Tian and Yuzhe Li and Weixi Ning and Xin Gao and Qiangfeng Cliff Zhang},
+  year      = {2022},
+  month     = {Oct.},
+  journal   = {Nature Communications},
+  publisher = {Springer Science and Business Media {LLC}},
+  volume    = {13},
+  number    = {1},
+  doi       = {10.1038/s41467-022-33758-z},
+  url       = {https://doi.org/10.1038/s41467-022-33758-z}
+}
+
+@article{xu2021probabilistic,
+  title     = {Probabilistic harmonization and annotation of single-cell transcriptomics data with deep generative models},
+  author    = {Chenling Xu and Romain Lopez and Edouard Mehlman and Jeffrey Regier and Michael I Jordan and Nir Yosef},
+  year      = {2021},
+  month     = {Jan},
+  journal   = {Molecular Systems Biology},
+  publisher = {{Embo}},
+  volume    = {17},
+  number    = {1},
+  doi       = {10.15252/msb.20209620},
+  url       = {https://doi.org/10.15252/msb.20209620}
+}
+
+@article{zappia2018exploring,
+  doi       = {10.1371/journal.pcbi.1006245},
+  url       = {https://doi.org/10.1371/journal.pcbi.1006245},
+  year      = {2018},
+  month     = {Jun.},
+  publisher = {Public Library of Science ({PLoS})},
+  volume    = {14},
+  number    = {6},
+  pages     = {e1006245},
+  author    = {Luke Zappia and Belinda Phipson and Alicia Oshlack},
+  editor    = {Dina Schneidman},
+  title     = {Exploring the single-cell {RNA}-seq analysis landscape with the {scRNA}-tools database},
+  journal   = {{PLOS} Computational Biology}
+}
+
+@article{zhang2021pydrmetrics,
+  title     = {{pyDRMetrics} - A Python toolkit for dimensionality reduction quality assessment},
+  author    = {Yinsheng Zhang and Qian Shang and Guoming Zhang},
+  year      = {2021},
+  month     = {Feb.},
+  journal   = {Heliyon},
+  publisher = {Elsevier {BV}},
+  volume    = {7},
+  number    = {2},
+  pages     = {e06199},
+  doi       = {10.1016/j.heliyon.2021.e06199},
+  url       = {https://doi.org/10.1016/j.heliyon.2021.e06199}
+}
+
+@article{zhang2022identification,
+  title     = {Identification of spatially variable genes with graph cuts},
+  author    = {Zhang, Ke and Feng, Wanwan and Wang, Peng},
+  journal   = {Nature Communications},
+  volume    = {13},
+  number    = {1},
+  pages     = {5488},
+  year      = {2022},
+  publisher = {Nature Publishing Group UK London},
+  doi       = {10.1038/s41467-022-33182-3}
+}
+
+@article{zhu2021spark,
+  title     = {SPARK-X: non-parametric modeling enables scalable and robust detection of spatial expression patterns for large spatial transcriptomic studies},
+  author    = {Zhu, Jiaqiang and Sun, Shiquan and Zhou, Xiang},
+  journal   = {Genome biology},
+  volume    = {22},
+  number    = {1},
+  pages     = {184},
+  year      = {2021},
+  publisher = {Springer},
+  doi       = {10.1186/s13059-021-02404-0}
+}
+
+@article{hrovatin2023delineating,
+  author       = {Karin Hrovatin and Aim{\'e}e Bastidas-Ponce and Mostafa Bakhti and Luke Zappia and Maren B{\"u}ttner and Ciro Sallino and Michael Sterr and Anika B{\"o}ttcher and Adriana Migliorini and Heiko Lickert and Fabian J. Theis},
+  title        = {Delineating mouse β-cell identity during lifetime and in diabetes with a single cell atlas},
+  elocation-id = {2022.12.22.521557},
+  year         = {2023},
+  doi          = {10.1101/2022.12.22.521557},
+  publisher    = {Cold Spring Harbor Laboratory},
+  url          = {https://www.biorxiv.org/content/early/2023/04/25/2022.12.22.521557},
+  eprint       = {https://www.biorxiv.org/content/early/2023/04/25/2022.12.22.521557.full.pdf},
+  journal      = {bioRxiv}
+}
+
+@article{sikkema2023integrated,
+  title     = {An integrated cell atlas of the lung in health and disease},
+  volume    = {29},
+  issn      = {1546-170X},
+  url       = {http://dx.doi.org/10.1038/s41591-023-02327-2},
+  doi       = {10.1038/s41591-023-02327-2},
+  number    = {6},
+  journal   = {Nature Medicine},
+  publisher = {Springer Science and Business Media LLC},
+  author    = {Sikkema,  Lisa and Ramírez-Suástegui,  Ciro and Strobl,  Daniel C. and Gillett,  Tessa E. and Zappia,  Luke and Madissoon,  Elo and Markov,  Nikolay S. and Zaragosi,  Laure-Emmanuelle and Ji,  Yuge and Ansari,  Meshal and Arguel,  Marie-Jeanne and Apperloo,  Leonie and Banchero,  Martin and Bécavin,  Christophe and Berg,  Marijn and Chichelnitskiy,  Evgeny and Chung,  Mei-i and Collin,  Antoine and Gay,  Aurore C. A. and Gote-Schniering,  Janine and Hooshiar Kashani,  Baharak and Inecik,  Kemal and Jain,  Manu and Kapellos,  Theodore S. and Kole,  Tessa M. and Leroy,  Sylvie and Mayr,  Christoph H. and Oliver,  Amanda J. and von Papen,  Michael and Peter,  Lance and Taylor,  Chase J. and Walzthoeni,  Thomas and Xu,  Chuan and Bui,  Linh T. and De Donno,  Carlo and Dony,  Leander and Faiz,  Alen and Guo,  Minzhe and Gutierrez,  Austin J. and Heumos,  Lukas and Huang,  Ni and Ibarra,  Ignacio L. and Jackson,  Nathan D. and Kadur Lakshminarasimha Murthy,  Preetish and Lotfollahi,  Mohammad and Tabib,  Tracy and Talavera-López,  Carlos and Travaglini,  Kyle J. and Wilbrey-Clark,  Anna and Worlock,  Kaylee B. and Yoshida,  Masahiro and Chen,  Yuexin and Hagood,  James S. and Agami,  Ahmed and Horvath,  Peter and Lundeberg,  Joakim and Marquette,  Charles-Hugo and Pryhuber,  Gloria and Samakovlis,  Chistos and Sun,  Xin and Ware,  Lorraine B. and Zhang,  Kun and van den Berge,  Maarten and Bossé,  Yohan and Desai,  Tushar J. and Eickelberg,  Oliver and Kaminski,  Naftali and Krasnow,  Mark A. and Lafyatis,  Robert and Nikolic,  Marko Z. and Powell,  Joseph E. and Rajagopal,  Jayaraj and Rojas,  Mauricio and Rozenblatt-Rosen,  Orit and Seibold,  Max A. and Sheppard,  Dean and Shepherd,  Douglas P. and Sin,  Don D. and Timens,  Wim and Tsankov,  Alexander M. and Whitsett,  Jeffrey and Xu,  Yan and Banovich,  Nicholas E. and Barbry,  Pascal and Duong,  Thu Elizabeth and Falk,  Christine S. and Meyer,  Kerstin B. and Kropski,  Jonathan A. and Pe’er,  Dana and Schiller,  Herbert B. and Tata,  Purushothama Rao and Schultze,  Joachim L. and Teichmann,  Sara A. and Misharin,  Alexander V. and Nawijn,  Martijn C. and Luecken,  Malte D. and Theis,  Fabian J.},
+  year      = {2023},
+  month     = jun,
+  pages     = {1563–1577}
+}
+
+@article{consortium2022tabula,
+  title     = {The Tabula Sapiens: A multiple-organ,  single-cell transcriptomic atlas of humans},
+  volume    = {376},
+  issn      = {1095-9203},
+  url       = {http://dx.doi.org/10.1126/science.abl4896},
+  doi       = {10.1126/science.abl4896},
+  number    = {6594},
+  journal   = {Science},
+  publisher = {American Association for the Advancement of Science (AAAS)},
+  author    = {Jones,  Robert C. and Karkanias,  Jim and Krasnow,  Mark A. and Pisco,  Angela Oliveira and Quake,  Stephen R. and Salzman,  Julia and Yosef,  Nir and Bulthaup,  Bryan and Brown,  Phillip and Harper,  William and Hemenez,  Marisa and Ponnusamy,  Ravikumar and Salehi,  Ahmad and Sanagavarapu,  Bhavani A. and Spallino,  Eileen and Aaron,  Ksenia A. and Concepcion,  Waldo and Gardner,  James M. and Kelly,  Burnett and Neidlinger,  Nikole and Wang,  Zifa and Crasta,  Sheela and Kolluru,  Saroja and Morri,  Maurizio and Pisco,  Angela Oliveira and Tan,  Serena Y. and Travaglini,  Kyle J. and Xu,  Chenling and Alcántara-Hernández,  Marcela and Almanzar,  Nicole and Antony,  Jane and Beyersdorf,  Benjamin and Burhan,  Deviana and Calcuttawala,  Kruti and Carter,  Matthew M. and Chan,  Charles K. F. and Chang,  Charles A. and Chang,  Stephen and Colville,  Alex and Crasta,  Sheela and Culver,  Rebecca N. and Cvijović,  Ivana and D’Amato,  Gaetano and Ezran,  Camille and Galdos,  Francisco X. and Gillich,  Astrid and Goodyer,  William R. and Hang,  Yan and Hayashi,  Alyssa and Houshdaran,  Sahar and Huang,  Xianxi and Irwin,  Juan C. and Jang,  SoRi and Juanico,  Julia Vallve and Kershner,  Aaron M. and Kim,  Soochi and Kiss,  Bernhard and Kolluru,  Saroja and Kong,  William and Kumar,  Maya E. and Kuo,  Angera H. and Leylek,  Rebecca and Li,  Baoxiang and Loeb,  Gabriel B. and Lu,  Wan-Jin and Mantri,  Sruthi and Markovic,  Maxim and McAlpine,  Patrick L. and de Morree,  Antoine and Morri,  Maurizio and Mrouj,  Karim and Mukherjee,  Shravani and Muser,  Tyler and Neuh\"{o}fer,  Patrick and Nguyen,  Thi D. and Perez,  Kimberly and Phansalkar,  Ragini and Pisco,  Angela Oliveira and Puluca,  Nazan and Qi,  Zhen and Rao,  Poorvi and Raquer-McKay,  Hayley and Schaum,  Nicholas and Scott,  Bronwyn and Seddighzadeh,  Bobak and Segal,  Joe and Sen,  Sushmita and Sikandar,  Shaheen and Spencer,  Sean P. and Steffes,  Lea C. and Subramaniam,  Varun R. and Swarup,  Aditi and Swift,  Michael and Travaglini,  Kyle J. and Van Treuren,  Will and Trimm,  Emily and Veizades,  Stefan and Vijayakumar,  Sivakamasundari and Vo,  Kim Chi and Vorperian,  Sevahn K. and Wang,  Wanxin and Weinstein,  Hannah N. W. and Winkler,  Juliane and Wu,  Timothy T. H. and Xie,  Jamie and Yung,  Andrea R. and Zhang,  Yue and Detweiler,  Angela M. and Mekonen,  Honey and Neff,  Norma F. and Sit,  Rene V. and Tan,  Michelle and Yan,  Jia and Bean,  Gregory R. and Charu,  Vivek and Forgó,  Erna and Martin,  Brock A. and Ozawa,  Michael G. and Silva,  Oscar and Tan,  Serena Y. and Toland,  Angus and Vemuri,  Venkata N. P. and Afik,  Shaked and Awayan,  Kyle and Botvinnik,  Olga Borisovna and Byrne,  Ashley and Chen,  Michelle and Dehghannasiri,  Roozbeh and Detweiler,  Angela M. and Gayoso,  Adam and Granados,  Alejandro A. and Li,  Qiqing and Mahmoudabadi,  Gita and McGeever,  Aaron and de Morree,  Antoine and Olivieri,  Julia Eve and Park,  Madeline and Pisco,  Angela Oliveira and Ravikumar,  Neha and Salzman,  Julia and Stanley,  Geoff and Swift,  Michael and Tan,  Michelle and Tan,  Weilun and Tarashansky,  Alexander J. and Vanheusden,  Rohan and Vorperian,  Sevahn K. and Wang,  Peter and Wang,  Sheng and Xing,  Galen and Xu,  Chenling and Yosef,  Nir and Alcántara-Hernández,  Marcela and Antony,  Jane and Chan,  Charles K. F. and Chang,  Charles A. and Colville,  Alex and Crasta,  Sheela and Culver,  Rebecca and Dethlefsen,  Les and Ezran,  Camille and Gillich,  Astrid and Hang,  Yan and Ho,  Po-Yi and Irwin,  Juan C. and Jang,  SoRi and Kershner,  Aaron M. and Kong,  William and Kumar,  Maya E. and Kuo,  Angera H. and Leylek,  Rebecca and Liu,  Shixuan and Loeb,  Gabriel B. and Lu,  Wan-Jin and Maltzman,  Jonathan S. and Metzger,  Ross J. and de Morree,  Antoine and Neuh\"{o}fer,  Patrick and Perez,  Kimberly and Phansalkar,  Ragini and Qi,  Zhen and Rao,  Poorvi and Raquer-McKay,  Hayley and Sasagawa,  Koki and Scott,  Bronwyn and Sinha,  Rahul and Song,  Hanbing and Spencer,  Sean P. and Swarup,  Aditi and Swift,  Michael and Travaglini,  Kyle J. and Trimm,  Emily and Veizades,  Stefan and Vijayakumar,  Sivakamasundari and Wang,  Bruce and Wang,  Wanxin and Winkler,  Juliane and Xie,  Jamie and Yung,  Andrea R. and Artandi,  Steven E. and Beachy,  Philip A. and Clarke,  Michael F. and Giudice,  Linda C. and Huang,  Franklin W. and Huang,  Kerwyn Casey and Idoyaga,  Juliana and Kim,  Seung K. and Krasnow,  Mark and Kuo,  Christin S. and Nguyen,  Patricia and Quake,  Stephen R. and Rando,  Thomas A. and Red-Horse,  Kristy and Reiter,  Jeremy and Relman,  David A. and Sonnenburg,  Justin L. and Wang,  Bruce and Wu,  Albert and Wu,  Sean M. and Wyss-Coray,  Tony},
+  year      = {2022},
+  month     = may
+}
+
+@article{dominguez2022crosstissue,
+  title     = {Cross-tissue immune cell analysis reveals tissue-specific features in humans},
+  volume    = {376},
+  issn      = {1095-9203},
+  url       = {http://dx.doi.org/10.1126/science.abl5197},
+  doi       = {10.1126/science.abl5197},
+  number    = {6594},
+  journal   = {Science},
+  publisher = {American Association for the Advancement of Science (AAAS)},
+  author    = {Domínguez Conde,  C. and Xu,  C. and Jarvis,  L. B. and Rainbow,  D. B. and Wells,  S. B. and Gomes,  T. and Howlett,  S. K. and Suchanek,  O. and Polanski,  K. and King,  H. W. and Mamanova,  L. and Huang,  N. and Szabo,  P. A. and Richardson,  L. and Bolt,  L. and Fasouli,  E. S. and Mahbubani,  K. T. and Prete,  M. and Tuck,  L. and Richoz,  N. and Tuong,  Z. K. and Campos,  L. and Mousa,  H. S. and Needham,  E. J. and Pritchard,  S. and Li,  T. and Elmentaite,  R. and Park,  J. and Rahmani,  E. and Chen,  D. and Menon,  D. K. and Bayraktar,  O. A. and James,  L. K. and Meyer,  K. B. and Yosef,  N. and Clatworthy,  M. R. and Sims,  P. A. and Farber,  D. L. and Saeb-Parsy,  K. and Jones,  J. L. and Teichmann,  S. A.},
+  year      = {2022},
+  month     = may
+}
+
+@article{eraslan2022singlenucleus,
+  title     = {Single-nucleus cross-tissue molecular reference maps toward understanding disease gene function},
+  volume    = {376},
+  issn      = {1095-9203},
+  url       = {http://dx.doi.org/10.1126/science.abl4290},
+  doi       = {10.1126/science.abl4290},
+  number    = {6594},
+  journal   = {Science},
+  publisher = {American Association for the Advancement of Science (AAAS)},
+  author    = {Eraslan,  G\"{o}kcen and Drokhlyansky,  Eugene and Anand,  Shankara and Fiskin,  Evgenij and Subramanian,  Ayshwarya and Slyper,  Michal and Wang,  Jiali and Van Wittenberghe,  Nicholas and Rouhana,  John M. and Waldman,  Julia and Ashenberg,  Orr and Lek,  Monkol and Dionne,  Danielle and Win,  Thet Su and Cuoco,  Michael S. and Kuksenko,  Olena and Tsankov,  Alexander M. and Branton,  Philip A. and Marshall,  Jamie L. and Greka,  Anna and Getz,  Gad and Segrè,  Ayellet V. and Aguet,  Fran\c{c}ois and Rozenblatt-Rosen,  Orit and Ardlie,  Kristin G. and Regev,  Aviv},
+  year      = {2022},
+  month     = may
+}
+
+@article{li2023integrated,
+  title   = {{Integrated multi-omics single cell atlas of the human retina}},
+  author  = {Li, Jin and Wang, Jun and Ibarra, Ignacio L and Cheng, Xuesen and
+             Luecken, Malte D and Lu, Jiaxiong and Monavarfeshani, Aboozar and
+             Yan, Wenjun and Zheng, Yiqiao and Zuo, Zhen and Zayas Colborn,
+             Samantha Lynn and Cortez, Berenice Sarahi and Owen, Leah A and
+             Tran, Nicholas M and Shekhar, Karthik and Sanes, Joshua R and
+             Stout, J Timothy and Chen, Shiming and Li, Yumei and DeAngelis,
+             Margaret M and Theis, Fabian J and Chen, Rui},
+  journal = {bioRxiv},
+  month   = nov,
+  year    = 2023,
+  doi     = {10.1101/2023.11.07.566105}
+}
+
+@article{wilson2022multimodal,
+  title     = {Multimodal single cell sequencing implicates chromatin accessibility and genetic background in diabetic kidney disease progression},
+  volume    = {13},
+  issn      = {2041-1723},
+  url       = {http://dx.doi.org/10.1038/s41467-022-32972-z},
+  doi       = {10.1038/s41467-022-32972-z},
+  number    = {1},
+  journal   = {Nature Communications},
+  publisher = {Springer Science and Business Media LLC},
+  author    = {Wilson,  Parker C. and Muto,  Yoshiharu and Wu,  Haojia and Karihaloo,  Anil and Waikar,  Sushrut S. and Humphreys,  Benjamin D.},
+  year      = {2022},
+  month     = sep
+}
+
+@article{steuernagel2022hypomap,
+  title     = {HypoMap—a unified single-cell gene expression atlas of the murine hypothalamus},
+  volume    = {4},
+  issn      = {2522-5812},
+  url       = {http://dx.doi.org/10.1038/s42255-022-00657-y},
+  doi       = {10.1038/s42255-022-00657-y},
+  number    = {10},
+  journal   = {Nature Metabolism},
+  publisher = {Springer Science and Business Media LLC},
+  author    = {Steuernagel,  Lukas and Lam,  Brian Y. H. and Klemm,  Paul and Dowsett,  Georgina K. C. and Bauder,  Corinna A. and Tadross,  John A. and Hitschfeld,  Tamara Sotelo and del Rio Martin,  Almudena and Chen,  Weiyi and de Solis,  Alain J. and Fenselau,  Henning and Davidsen,  Peter and Cimino,  Irene and Kohnke,  Sara N. and Rimmington,  Debra and Coll,  Anthony P. and Beyer,  Andreas and Yeo,  Giles S. H. and Br\"{u}ning,  Jens C.},
+  year      = {2022},
+  month     = oct,
+  pages     = {1402–1419}
+}
+
+@article{tian2023singlecell,
+  title     = {Single-cell DNA methylation and 3D genome architecture in the human brain},
+  volume    = {382},
+  issn      = {1095-9203},
+  url       = {http://dx.doi.org/10.1126/science.adf5357},
+  doi       = {10.1126/science.adf5357},
+  number    = {6667},
+  journal   = {Science},
+  publisher = {American Association for the Advancement of Science (AAAS)},
+  author    = {Tian,  Wei and Zhou,  Jingtian and Bartlett,  Anna and Zeng,  Qiurui and Liu,  Hanqing and Castanon,  Rosa G. and Kenworthy,  Mia and Altshul,  Jordan and Valadon,  Cynthia and Aldridge,  Andrew and Nery,  Joseph R. and Chen,  Huaming and Xu,  Jiaying and Johnson,  Nicholas D. and Lucero,  Jacinta and Osteen,  Julia K. and Emerson,  Nora and Rink,  Jon and Lee,  Jasper and Li,  Yang E. and Siletti,  Kimberly and Liem,  Michelle and Claffey,  Naomi and O’Connor,  Carolyn and Yanny,  Anna Marie and Nyhus,  Julie and Dee,  Nick and Casper,  Tamara and Shapovalova,  Nadiya and Hirschstein,  Daniel and Ding,  Song-Lin and Hodge,  Rebecca and Levi,  Boaz P. and Keene,  C. Dirk and Linnarsson,  Sten and Lein,  Ed and Ren,  Bing and Behrens,  M. Margarita and Ecker,  Joseph R.},
+  year      = {2023},
+  month     = oct
+}
+
+@article{sonrel2023metaanalysis,
+  title     = {Meta-analysis of (single-cell method) benchmarks reveals the need for extensibility and interoperability},
+  volume    = {24},
+  issn      = {1474-760X},
+  url       = {http://dx.doi.org/10.1186/s13059-023-02962-5},
+  doi       = {10.1186/s13059-023-02962-5},
+  number    = {1},
+  journal   = {Genome Biology},
+  publisher = {Springer Science and Business Media LLC},
+  author    = {Sonrel,  Anthony and Luetge,  Almut and Soneson,  Charlotte and Mallona,  Izaskun and Germain,  Pierre-Luc and Knyazev,  Sergey and Gilis,  Jeroen and Gerber,  Reto and Seurinck,  Ruth and Paul,  Dominique and Sonder,  Emanuel and Crowell,  Helena L. and Fanaswala,  Imran and Al-Ajami,  Ahmad and Heidari,  Elyas and Schmeing,  Stephan and Milosavljevic,  Stefan and Saeys,  Yvan and Mangul,  Serghei and Robinson,  Mark D.},
+  year      = {2023},
+  month     = may
+}
+
+@article{saelens2019comparison,
+  title     = {A comparison of single-cell trajectory inference methods},
+  volume    = {37},
+  issn      = {1546-1696},
+  url       = {http://dx.doi.org/10.1038/s41587-019-0071-9},
+  doi       = {10.1038/s41587-019-0071-9},
+  number    = {5},
+  journal   = {Nature Biotechnology},
+  publisher = {Springer Science and Business Media LLC},
+  author    = {Saelens,  Wouter and Cannoodt,  Robrecht and Todorov,  Helena and Saeys,  Yvan},
+  year      = {2019},
+  month     = apr,
+  pages     = {547–554}
+}
+
+@article{huang2018savergene,
+  title     = {SAVER: gene expression recovery for single-cell RNA sequencing},
+  volume    = {15},
+  issn      = {1548-7105},
+  url       = {http://dx.doi.org/10.1038/s41592-018-0033-z},
+  doi       = {10.1038/s41592-018-0033-z},
+  number    = {7},
+  journal   = {Nature Methods},
+  publisher = {Springer Science and Business Media LLC},
+  author    = {Huang,  Mo and Wang,  Jingshu and Torre,  Eduardo and Dueck,  Hannah and Shaffer,  Sydney and Bonasio,  Roberto and Murray,  John I. and Raj,  Arjun and Li,  Mingyao and Zhang,  Nancy R.},
+  year      = {2018},
+  month     = jun,
+  pages     = {539–542}
+}
+
+@article{chari2023speciousart,
+  title     = {The specious art of single-cell genomics},
+  volume    = {19},
+  issn      = {1553-7358},
+  url       = {http://dx.doi.org/10.1371/journal.pcbi.1011288},
+  doi       = {10.1371/journal.pcbi.1011288},
+  number    = {8},
+  journal   = {PLOS Computational Biology},
+  publisher = {Public Library of Science (PLoS)},
+  author    = {Chari,  Tara and Pachter,  Lior},
+  editor    = {Papin,  Jason A.},
+  year      = {2023},
+  month     = aug,
+  pages     = {e1011288}
+}
+
+@article{szalata2024transformers,
+  title     = {{Transformers in single-cell omics: a review and new perspectives}},
+  author    = {Szałata, Artur and Hrovatin, Karin and Becker, Sören and Tejada-Lapuerta, Alejandro and Cui, Haotian and Wang, Bo and Theis, Fabian J},
+  journal   = {Nature methods},
+  publisher = {Springer Science and Business Media LLC},
+  volume    = 21,
+  number    = 8,
+  pages     = {1430--1443},
+  month     = aug,
+  year      = 2024,
+  doi       = {10.1038/s41592-024-02353-z},
+  pmid      = 39122952,
+  issn      = {1548-7091,1548-7105},
+  language  = {en}
+}
+
+@article{boiarsky2023foundationmodels,
+  title    = {{A deep dive into single-cell RNA sequencing foundation models}},
+  author   = {Boiarsky, Rebecca and Singh, Nalini and Buendia, Alejandro and Getz, Gad and Sontag, David},
+  journal  = {bioRxiv},
+  pages    = {2023.10.19.563100},
+  month    = oct,
+  year     = 2023,
+  doi      = {10.1101/2023.10.19.563100},
+  language = {en}
+}
+
+@article{liu2024foundationmodels,
+  title    = {{Evaluating the utilities of foundation models in single-cell data analysis}},
+  author   = {Liu, Tianyu and Li, Kexing and Wang, Yuge and Li, Hongyu and Zhao, Hongyu},
+  journal  = {bioRxiv.org: the preprint server for biology},
+  pages    = {2023.09.08.555192},
+  month    = aug,
+  year     = 2024,
+  doi      = {10.1101/2023.09.08.555192},
+  pmc      = {PMC10925156},
+  pmid     = 38464157,
+  language = {en}
+}
diff --git a/src/reporting/shared/functions.R b/src/reporting/shared/functions.R
new file mode 100644
index 000000000..42179d5ce
--- /dev/null
+++ b/src/reporting/shared/functions.R
@@ -0,0 +1,95 @@
+#' Read bibliography
+#'
+#' @param bib_file Path to a bibliography BibTex file
+#'
+#' @returns A list with two elements `doi` and `bibtex` where the names are
+#'   reference keys and the values are the corresponding DOIs or BibTeX entries
+read_bibliography <- function(bib_file) {
+  bibentries <- bibtex::read.bib(bib_file)
+
+  dois <- lapply(bibentries, function(.entry) {
+    if (!is.null(.entry$doi)) {
+      .entry$doi
+    } else if (!is.null(.entry$DOI)) {
+      .entry$DOI
+    } else {
+      NULL
+    }
+  }) |>
+    purrr::compact()
+
+  bibtex <- lapply(bibentries, function(.entry) {
+    format(.entry, "bibtex")
+  })
+
+  list(
+    doi = dois,
+    bibtex = bibtex
+  )
+}
+
+#' Get references list
+#'
+#' Convert a reference field from a config file into a references list
+#' resolving any legacy reference keys
+#'
+#' @param reference The reference field value
+#' @param bibliography A bibliography list as returned by `read_bibliography()`
+#'
+#' @returns A list with two elements `doi` and `bibtex`, where each is a character vector
+#'   containing corresponding DOIs or BibTeX entries
+get_references_list <- function(reference, bibliography) {
+  # If null, return empty references
+  if (is.null(reference)) {
+    return(list(doi = character(0), bibtex = character(0)))
+  }
+
+  # If reference is a list, assume it is in the current format
+  if (is.list(reference)) {
+    return(
+      list(
+        doi = reference$doi %||% character(0),
+        bibtex = reference$bibtex %||% character(0)
+      )
+    )
+  }
+
+  # If not a list, check if it is a DOI or BibTeX entry
+  if (startsWith(reference, "@")) {
+    return(list(doi = character(0), bibtex = reference))
+  } else if (startsWith(reference, "1")) {
+    return(list(doi = reference, bibtex = character(0)))
+  }
+
+  # Otherwise, assume it is a bibliography key
+  if (reference %in% names(bibliography$doi)) {
+    return(list(doi = bibliography$doi[[reference]], bibtex = character(0)))
+  } else if (reference %in% names(bibliograph$bibtex)) {
+    return(list(doi = character(0), bibtex = bibliography$bibtex[[reference]]))
+  } else {
+    stop("Reference key '", reference, "' not found in bibliography")
+  }
+}
+
+#' Get authors list
+#'
+#' Convert a list of authors from a config file into a structured list
+#'
+#' @param authors The authors field from a config file
+#'
+#' @returns An authors list in the expected format
+get_authors_list <- function(authors) {
+  `%||%` <- rlang::`%||%`
+
+  purrr::map(authors, function(.author) {
+    other_fields <- setdiff(names(.author$info), c("github", "orcid"))
+
+    list(
+      name = jsonlite::unbox(.author$name),
+      roles = .author$roles %||% character(0),
+      github = jsonlite::unbox(.author$info$github),
+      orcid = jsonlite::unbox(.author$info$orcid),
+      info = .author$info[other_fields]
+    )
+  })
+}