diff --git a/Gemfile b/Gemfile
deleted file mode 100644
index 5b22a72ad15642..00000000000000
--- a/Gemfile
+++ /dev/null
@@ -1,33 +0,0 @@
-# frozen_string_literal: true
-
-# ruby '>= 3.0.0'
-
-source 'https://rubygems.org'
-gem 'addressable'
-gem 'awesome_bot'
-gem 'html-proofer', '> 5.0.0'
-gem 'jekyll'
-gem 'jekyll-feed'
-gem 'jekyll-redirect-from'
-gem 'kwalify'
-gem 'nokogiri', '>= 1.18'
-gem 'pkg-config'
-gem 'webrick' if RUBY_VERSION >= '3.0' # Ruby3 does not ship with webrick and Jekyll needs it, see https://github.com/jekyll/jekyll/issues/8523
-
-# Citations
-gem 'bibtex-ruby'
-gem 'citeproc-ruby'
-gem 'csl-styles'
-
-# Image processing
-gem 'fastimage'
-
-# For our CLI tools
-gem 'commander'
-
-# RO-Crates
-gem 'rubyzip', '~> 2.3.0'
-
-# Documentation
-gem 'rdoc', '~> 6.7'
-gem 'rorvswild_theme_rdoc'
diff --git a/Gemfile.lock b/Gemfile.lock
deleted file mode 100644
index 24ec16c18bd852..00000000000000
--- a/Gemfile.lock
+++ /dev/null
@@ -1,179 +0,0 @@
-GEM
- remote: https://rubygems.org/
- specs:
- Ascii85 (2.0.1)
- addressable (2.8.5)
- public_suffix (>= 2.0.2, < 6.0)
- afm (0.2.2)
- async (2.23.0)
- console (~> 1.29)
- fiber-annotation
- io-event (~> 1.9)
- metrics (~> 0.12)
- traces (~> 0.15)
- awesome_bot (1.20.0)
- parallel (= 1.20.1)
- bibtex-ruby (6.0.0)
- latex-decode (~> 0.0)
- bigdecimal (3.1.9)
- citeproc (1.0.10)
- namae (~> 1.0)
- citeproc-ruby (2.0.0)
- citeproc (~> 1.0, >= 1.0.9)
- csl (~> 2.0)
- colorator (1.1.0)
- commander (4.6.0)
- highline (~> 2.0.0)
- concurrent-ruby (1.2.2)
- console (1.29.2)
- fiber-annotation
- fiber-local (~> 1.1)
- json
- csl (2.0.0)
- namae (~> 1.0)
- rexml
- csl-styles (2.0.1)
- csl (~> 2.0)
- date (3.4.1)
- em-websocket (0.5.3)
- eventmachine (>= 0.12.9)
- http_parser.rb (~> 0)
- ethon (0.16.0)
- ffi (>= 1.15.0)
- eventmachine (1.2.7)
- fastimage (2.2.7)
- ffi (1.16.2)
- fiber-annotation (0.2.0)
- fiber-local (1.1.0)
- fiber-storage
- fiber-storage (1.0.0)
- forwardable-extended (2.6.0)
- google-protobuf (3.25.5-x86_64-darwin)
- google-protobuf (3.25.5-x86_64-linux)
- hashery (2.1.2)
- highline (2.0.3)
- html-proofer (5.0.9)
- addressable (~> 2.3)
- async (~> 2.1)
- nokogiri (~> 1.13)
- pdf-reader (~> 2.11)
- rainbow (~> 3.0)
- typhoeus (~> 1.3)
- yell (~> 2.0)
- zeitwerk (~> 2.5)
- http_parser.rb (0.8.0)
- i18n (1.14.1)
- concurrent-ruby (~> 1.0)
- io-event (1.9.0)
- jekyll (4.3.2)
- addressable (~> 2.4)
- colorator (~> 1.0)
- em-websocket (~> 0.5)
- i18n (~> 1.0)
- jekyll-sass-converter (>= 2.0, < 4.0)
- jekyll-watch (~> 2.0)
- kramdown (~> 2.3, >= 2.3.1)
- kramdown-parser-gfm (~> 1.0)
- liquid (~> 4.0)
- mercenary (>= 0.3.6, < 0.5)
- pathutil (~> 0.9)
- rouge (>= 3.0, < 5.0)
- safe_yaml (~> 1.0)
- terminal-table (>= 1.8, < 4.0)
- webrick (~> 1.7)
- jekyll-feed (0.17.0)
- jekyll (>= 3.7, < 5.0)
- jekyll-redirect-from (0.16.0)
- jekyll (>= 3.3, < 5.0)
- jekyll-sass-converter (3.0.0)
- sass-embedded (~> 1.54)
- jekyll-watch (2.2.1)
- listen (~> 3.0)
- json (2.10.2)
- kramdown (2.4.0)
- rexml
- kramdown-parser-gfm (1.1.0)
- kramdown (~> 2.0)
- kwalify (0.7.2)
- latex-decode (0.4.0)
- liquid (4.0.4)
- listen (3.8.0)
- rb-fsevent (~> 0.10, >= 0.10.3)
- rb-inotify (~> 0.9, >= 0.9.10)
- mercenary (0.4.0)
- metrics (0.12.1)
- namae (1.1.1)
- nokogiri (1.18.9-x86_64-darwin)
- racc (~> 1.4)
- nokogiri (1.18.9-x86_64-linux-gnu)
- racc (~> 1.4)
- parallel (1.20.1)
- pathutil (0.16.2)
- forwardable-extended (~> 2.6)
- pdf-reader (2.14.1)
- Ascii85 (>= 1.0, < 3.0, != 2.0.0)
- afm (~> 0.2.1)
- hashery (~> 2.0)
- ruby-rc4
- ttfunk
- pkg-config (1.5.5)
- psych (5.2.1)
- date
- stringio
- public_suffix (5.0.3)
- racc (1.8.1)
- rainbow (3.1.1)
- rb-fsevent (0.11.2)
- rb-inotify (0.10.1)
- ffi (~> 1.0)
- rdoc (6.9.1)
- psych (>= 4.0.0)
- rexml (3.4.2)
- rorvswild_theme_rdoc (0.2)
- rouge (4.1.3)
- ruby-rc4 (0.1.5)
- rubyzip (2.3.2)
- safe_yaml (1.0.5)
- sass-embedded (1.69.5-x86_64-darwin)
- google-protobuf (~> 3.23)
- sass-embedded (1.69.5-x86_64-linux-gnu)
- google-protobuf (~> 3.23)
- stringio (3.1.2)
- terminal-table (3.0.2)
- unicode-display_width (>= 1.1.1, < 3)
- traces (0.15.2)
- ttfunk (1.8.0)
- bigdecimal (~> 3.1)
- typhoeus (1.4.1)
- ethon (>= 0.9.0)
- unicode-display_width (2.5.0)
- webrick (1.8.2)
- yell (2.2.2)
- zeitwerk (2.7.1)
-
-PLATFORMS
- x86_64-darwin-19
- x86_64-linux
-
-DEPENDENCIES
- addressable
- awesome_bot
- bibtex-ruby
- citeproc-ruby
- commander
- csl-styles
- fastimage
- html-proofer (> 5.0.0)
- jekyll
- jekyll-feed
- jekyll-redirect-from
- kwalify
- nokogiri (>= 1.18)
- pkg-config
- rdoc (~> 6.7)
- rorvswild_theme_rdoc
- rubyzip (~> 2.3.0)
- webrick
-
-BUNDLED WITH
- 2.4.20
diff --git a/Makefile b/Makefile
index f41b6ac3e1e3be..5cafd0b2bbe42b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# Settings
UNAME := $(shell uname)
JEKYLL=jekyll
-PORT?=4000
+PORT?=4001
HOST?=0.0.0.0
FLAGS?=""
ENV?="development"
diff --git a/_layouts/community.html b/_layouts/community.html.bk
similarity index 100%
rename from _layouts/community.html
rename to _layouts/community.html.bk
diff --git a/topics/single-cell/tutorials/scrna-case_basic-pipeline/faqs/findmarkers_empty.md b/topics/single-cell/tutorials/spatial_Vizgen_squidpy/faqs/findmarkers_empty.md
similarity index 100%
rename from topics/single-cell/tutorials/scrna-case_basic-pipeline/faqs/findmarkers_empty.md
rename to topics/single-cell/tutorials/spatial_Vizgen_squidpy/faqs/findmarkers_empty.md
diff --git a/topics/single-cell/tutorials/spatial_Vizgen_squidpy/faqs/index.md b/topics/single-cell/tutorials/spatial_Vizgen_squidpy/faqs/index.md
new file mode 100644
index 00000000000000..9ce3fe4fce824b
--- /dev/null
+++ b/topics/single-cell/tutorials/spatial_Vizgen_squidpy/faqs/index.md
@@ -0,0 +1,3 @@
+---
+layout: faq-page
+---
diff --git a/topics/single-cell/tutorials/scrna-case_basic-pipeline/faqs/plotembed_fails.md b/topics/single-cell/tutorials/spatial_Vizgen_squidpy/faqs/plotembed_fails.md
similarity index 100%
rename from topics/single-cell/tutorials/scrna-case_basic-pipeline/faqs/plotembed_fails.md
rename to topics/single-cell/tutorials/spatial_Vizgen_squidpy/faqs/plotembed_fails.md
diff --git a/topics/single-cell/tutorials/scrna-case_basic-pipeline/faqs/plotembed_results_missing.md b/topics/single-cell/tutorials/spatial_Vizgen_squidpy/faqs/plotembed_results_missing.md
similarity index 100%
rename from topics/single-cell/tutorials/scrna-case_basic-pipeline/faqs/plotembed_results_missing.md
rename to topics/single-cell/tutorials/spatial_Vizgen_squidpy/faqs/plotembed_results_missing.md
diff --git a/topics/single-cell/tutorials/spatial_Vizgen_squidpy/tutorial.bib b/topics/single-cell/tutorials/spatial_Vizgen_squidpy/tutorial.bib
new file mode 100644
index 00000000000000..c1f135db45603e
--- /dev/null
+++ b/topics/single-cell/tutorials/spatial_Vizgen_squidpy/tutorial.bib
@@ -0,0 +1,90 @@
+@article {Moreno2020.04.08.032698,
+ author = {Moreno, P. and Huang, N. and Manning, J.R. and Mohammed, S. and Solovyev, A. and Polanski, K. and Chazarra, R. and Talavera-Lopez, C. and Doyle, M. and Marnier, G. and Gr{\"u}ning, B. and Rasche, H. and Bacon, W. and Perez-Riverol, Y. and Haeussler, M. and Meyer, K.B. and Teichmann, S. and Papatheodorou, I.},
+ title = {User-friendly, scalable tools and workflows for single-cell analysis},
+ elocation-id = {2020.04.08.032698},
+ year = {2020},
+ doi = {10.1101/2020.04.08.032698},
+ publisher = {Cold Spring Harbor Laboratory},
+ abstract = {Single-cell RNA-Seq (scRNA-Seq) data analysis requires expertise in command-line tools, programming languages and scaling on compute infrastructure. As scRNA-Seq becomes widespread, computational pipelines need to be more accessible, simpler and scalable. We introduce an interactive analysis environment for scRNA-Seq, based on Galaxy, with ~70 functions from major single-cell analysis tools, which can be run on compute clusters, cloud providers or single machines, to bring compute to the data in scRNA-Seq.Competing Interest StatementThe authors have declared no competing interest.},
+ URL = {https://www.biorxiv.org/content/early/2020/04/09/2020.04.08.032698},
+ eprint = {https://www.biorxiv.org/content/early/2020/04/09/2020.04.08.032698.full.pdf},
+ journal = {bioRxiv}
+}
+
+@article{Bacon2018,
+ doi = {10.3389/fimmu.2018.02523},
+ url = {https://doi.org/10.3389/fimmu.2018.02523},
+ year = {2018},
+ month = nov,
+ publisher = {Frontiers Media {SA}},
+ volume = {9},
+ author = {Wendi A. Bacon and Russell S. Hamilton and Ziyi Yu and Jens Kieckbusch and Delia Hawkes and Ada M. Krzak and Chris Abell and Francesco Colucci and D. Stephen Charnock-Jones},
+ title = {Single-Cell Analysis Identifies Thymic Maturation Delay in Growth-Restricted Neonatal Mice},
+ journal = {Frontiers in Immunology}
+}
+
+@article{Satija2015,
+ doi = {10.1038/nbt.3192},
+ url = {https://doi.org/10.1038/nbt.3192},
+ year = {2015},
+ month = apr,
+ publisher = {Springer Science and Business Media {LLC}},
+ volume = {33},
+ number = {5},
+ pages = {495--502},
+ author = {Rahul Satija and Jeffrey A Farrell and David Gennert and Alexander F Schier and Aviv Regev},
+ title = {Spatial reconstruction of single-cell gene expression data},
+ journal = {Nature Biotechnology}
+}
+
+@article{Wolf2018,
+ doi = {10.1186/s13059-017-1382-0},
+ url = {https://doi.org/10.1186/s13059-017-1382-0},
+ year = {2018},
+ month = feb,
+ publisher = {Springer Science and Business Media {LLC}},
+ volume = {19},
+ number = {1},
+ author = {F. Alexander Wolf and Philipp Angerer and Fabian J. Theis},
+ title = {{SCANPY}: large-scale single-cell gene expression data analysis},
+ journal = {Genome Biology}
+}
+
+@article{Trapnell2014,
+ doi = {10.1038/nbt.2859},
+ url = {https://doi.org/10.1038/nbt.2859},
+ year = {2014},
+ month = mar,
+ publisher = {Springer Science and Business Media {LLC}},
+ volume = {32},
+ number = {4},
+ pages = {381--386},
+ author = {Cole Trapnell and Davide Cacchiarelli and Jonna Grimsby and Prapti Pokharel and Shuqiang Li and Michael Morse and Niall J Lennon and Kenneth J Livak and Tarjei S Mikkelsen and John L Rinn},
+ title = {The dynamics and regulators of cell fate decisions are revealed by pseudotemporal ordering of single cells},
+ journal = {Nature Biotechnology}
+}
+
+@article{McCarthy2017,
+ doi = {10.1093/bioinformatics/btw777},
+ url = {https://doi.org/10.1093/bioinformatics/btw777},
+ year = {2017},
+ month = jan,
+ publisher = {Oxford University Press ({OUP})},
+ pages = {btw777},
+ author = {Davis J. McCarthy and Kieran R. Campbell and Aaron T. L. Lun and Quin F. Wills},
+ title = {Scater: pre-processing, quality control, normalization and visualization of single-cell {RNA}-seq data in R},
+ journal = {Bioinformatics}
+}
+
+@article{Cakir2020,
+ doi = {10.1093/nargab/lqaa052},
+ url = {https://doi.org/10.1093/nargab/lqaa052},
+ year = {2020},
+ month = jul,
+ publisher = {Oxford University Press ({OUP})},
+ volume = {2},
+ number = {3},
+ author = {Batuhan Cakir and Martin Prete and Ni Huang and Stijn van~Dongen and Pinar Pir and Vladimir~Yu Kiselev},
+ title = {Comparison of visualization tools for single-cell {RNAseq} data},
+ journal = {{NAR} Genomics and Bioinformatics}
+}
diff --git a/topics/single-cell/tutorials/spatial_Vizgen_squidpy/tutorial.md b/topics/single-cell/tutorials/spatial_Vizgen_squidpy/tutorial.md
new file mode 100644
index 00000000000000..880e9f4d645c72
--- /dev/null
+++ b/topics/single-cell/tutorials/spatial_Vizgen_squidpy/tutorial.md
@@ -0,0 +1,1186 @@
+---
+layout: tutorial_hands_on
+
+title: Filter, plot and explore spatial data with Scanpy and Squidpy
+priority: 3
+dataset_link: 'https://info.vizgen.com/mouse-brain-map?submissionGuid=a66ccb7f-87cf-4c55-83b9-5a2b6c0c12b9'
+galxy_link: 'https://usegalaxy.eu/api/datasets/26c75dcccb616ac8be081820ee509554/display?to_ext=h5ad'
+
+answer_histories:
+ - label: "part-wf-squidpy"
+ history: https://usegalaxy.eu/u/mohitnavandar/h/part1-wf-squidpy
+ date: 2025-12-03
+
+input_histories:
+ - label: "part-wf-squidpy"
+ history: https://usegalaxy.eu/u/mohitnavandar/h/part1-wf-squidpy
+
+questions:
+- How can I assess my dataset quality?
+- How do I choose appropriate filtering thresholds and analysis parameters for MERFISH data?
+- How do I generate spatially informed cell clusters and annotate them using gene expression, spatial location, and tissue structure?
+- How do I build and evaluate a neighborhood graph to understand spatial relationships between cells?
+
+objectives:
+- Interpret spatial and non-spatial quality control plots (e.g., transcript counts, cell size, mitochondrial content, spatial density maps) to guide parameter selection and filtering decisions.
+
+- Repeat analysis steps—from matrix preprocessing to neighborhood graph construction and clustering—while adjusting parameters as needed.
+
+- Identify decision points where choices about filtering, normalization, spatial smoothing, neighborhood graph building, or clustering affect biological interpretation.
+
+- Evaluate clustering outputs, spatial embeddings, segmentations, and neighborhood-based statistics to make informed, data-driven decisions.
+
+- Explain why spatial single-cell analysis is iterative: early plots, spatial maps, and neighborhood graphs often reveal segmentation errors, background cells, or parameter issues that require revisiting and refining the analysis.
+
+time_estimation: 3H
+
+key_points:
+- Spatial transcriptomics data is extremely high-dimensional. To analyze MERFISH single-cell data, we must reduce its complexity—thousands of cells across hundreds of genes—using dimensionality-reduction, neighborhood graph construction, and clustering methods.
+
+- Analysis is more subjective than it appears. Extracting meaningful biological insight requires both prior knowledge of the tissue and iterative refinement of filtering thresholds, spatial parameters, and clustering decisions. Multiple rounds of exploration are essential to distinguish true biological structure from technical or spatial artifacts.
+
+requirements:
+-
+ type: "internal"
+ topic_name: single-cell
+ tutorials:
+ - scspatial-vpt
+ -
+tags:
+- paper-replication
+- MIGHTS
+
+
+contributions:
+ authorship:
+ - Mohit Navandar
+ - Pavan Videm
+ - Amirhossein Naghsh Nilchi
+
+ editing:
+ - nomadscientist
+ testing:
+ -
+ infrastructure:
+ - pavanvidem
+ - Nilchia
+
+follow_up_training:
+ -
+ type: "internal"
+ topic_name: single-cell
+ tutorials:
+ - scspatial-xxx
+ - scspatial-xxx
+
+recordings:
+- captioners:
+ - nomadscientist
+ date: '2021-02-15'
+ galaxy_version: '21.01'
+ length: 30M
+ youtube_id: M6iepSJh0EQ
+ speakers:
+ - nomadscientist
+
+---
+
+
+The dataset is processed with spatialData-IO to get the annData format.
+
+> What is the data?
+>
+> The dataset includes MERFISH measurements of a 483-gene panel—covering canonical brain cell-type markers, GPCRs, and RTKs. These been collected from three full coronal slices across three biological replicates. The current data represent one slice from replicate 1.
+>
+> 
+>
+{: .details}
+
+>
+>
+> In this tutorial, we will cover:
+>
+> 1. TOC
+> {:toc}
+>
+{: .agenda}
+
+## Get data
+
+We've provided you with annData object proceed by spatial-IO. you can Download the primary data from Vizgen('https://info.vizgen.com/mouse-brain-map?submissionGuid=a66ccb7f-87cf-4c55-83b9-5a2b6c0c12b9').
+
+
+{% include _includes/cyoa-choices.html option1="Import History on EU server" option2="Zenodo" option3="SCXA" default="Import History on EU server"
+ text="If you're on the EU server, (if your usegalaxy has an **EU** anywhere in the URL), then the quickest way to Get the Data for this tutorial is via importing a history. Otherwise, you can also import from Zenodo - it just might take a moment longer if you're in a live course and everyone is importing the same dataset at the same time! The SCXA is specifically for learners who are focusing on *Reusing public data*, so are not beginners." %}
+
+
+
+> Import History from EU server
+>
+> 1. Import the {% icon galaxy-history-input %} *Input history* by following the link below
+>
+> {% for h in page.input_histories %}
+> [ {{h.label}} Input History]( {{h.history}} )
+> {% endfor %}
+>
+> {% snippet faqs/galaxy/histories_import.md %}
+>
+{: .hands_on}
+
+
+
+
+
+> Import from Zenodo
+>
+> 1. Create a new history for this tutorial
+> 2. Import the AnnData object from [Zenodo]({{ page.zenodo_link }})
+>
+> ```
+> {{ page.zenodo_link }}/files/Batched_Object
+> ```
+>
+> {% snippet faqs/galaxy/datasets_import_via_link.md %}
+>
+> 3. **Rename** {% icon galaxy-pencil %} the datasets `QC_Object`
+> 4. Check that the datatype is `h5ad`
+>
+> {% snippet faqs/galaxy/datasets_change_datatype.md datatype="h5ad" %}
+>
+{: .hands_on}
+
+
+
+
+
+You can also pull the data from publicly available [Single Cell Expression Atlas](https://www.ebi.ac.uk/gxa/sc/home).
+
+> Import from the EBI Single Cell Expression Atlas
+>
+> 1. {% tool [EBI SCXA Data Retrieval](toolshed.g2.bx.psu.edu/repos/ebi-gxa/retrieve_scxa/retrieve_scxa/v0.0.2+galaxy2) %} with the following parameters:
+> - **SC-Atlas experiment accession**: `E-MTAB-6945`
+>
+> 2. {% icon level %}**Follow tutorial to reformat dataset**: This [short tutorial]({% link topics/single-cell/tutorials/EBI-retrieval/tutorial.md %}) will show you how to use this tool and modify the output so that it's compatible with this tutorial and its workflow.
+>
+{: .hands_on}
+
+
+
+# Important tips for easier analysis
+
+{% snippet faqs/galaxy/tutorial_mode.md %}
+
+{% snippet topics/single-cell/faqs/single_cell_omics.md %}
+
+{% snippet faqs/galaxy/analysis_troubleshooting.md sc=true %}
+
+{% snippet faqs/gtn/gtn_example_histories.md %}
+
+
+# Quality control
+
+You have generated an annotated AnnData object from your raw Vizgen MERFISH data. However, the dataset has only undergone an initial, coarse filtering step—meaning it still contains a number of ‘cells’ that are actually background barcodes, debris, or poorly segmented objects. Similarly, some detected transcripts may represent imaging artifacts or genes expressed at such low abundance that downstream statistical analyses will not be reliable. This background noise—both false-positive cells and low-quality gene signals—not only makes it harder to resolve true biological structure, but also increases the computational load during analysis. Our first task is therefore to refine this matrix, removing spurious cells and genes so we can work with cleaner data, extract meaningful biological insight, and perform faster downstream analyses.
+
+## Calculate QC Metrics
+
+To filter the object, we need to calculate some metrics for each cell and gene.
+
+> Compute QC metrics
+>
+> 1. {% tool [Scanpy Inspect and manipulate](https://squidpy.readthedocs.io/en/stable/notebooks/tutorials/tutorial_vizgen.html#calculate-quality-control-metrics) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `Batched_Object`
+> - *"Method used for inspecting"*: `Calculate quality control metrics, using 'pp.calculate_qc_metrics'`
+> - *"Name of kind of values in X"*: `counts`
+> - *"The kind of thing the variables are"*: `genes`
+> - *"Keys for boolean columns of `.var` which identify variables you could want to control for"*: `mito`
+>
+> 2. Rename the generated file `QC_Object`
+>
+{: .hands_on}
+
+## Inspect the AnnData Object
+
+What has this tool calculated?
+
+>
+>
+> 1. What information is stored in your AnnData object? For example, the last tool to generate this object counted the mitochondrial associated genes in your matrix. Where is that data stored?
+> 2. While you are figuring that out, how many genes and cells are in your object?
+>
+> > Hint
+> > You want to use the same tool you used in the previous tutorial to examine your AnnData. Sometimes you can get the answers from *peeking* at your {% icon param-file %} AnnData object in your {% icon galaxy-history %} history, but sometimes it's not quite that simple!
+> >
+> > > Inspecting AnnData Objects
+> > >
+> > > 1. {% tool [Inspect AnnData](toolshed.g2.bx.psu.edu/repos/iuc/anndata_inspect/anndata_inspect/0.10.9+galaxy1) %} with the following parameters:
+> > > - {% icon param-file %} *"Annotated data matrix"*: `QC_Object`
+> > > - *"What to inspect?"*: `Key-indexed observations annotation (obs)`
+> > > 2. {% tool [Inspect AnnData](toolshed.g2.bx.psu.edu/repos/iuc/anndata_inspect/anndata_inspect/0.10.9+galaxy1) %} with the following parameters:
+> > > - {% icon param-file %} *"Annotated data matrix"*: `QC_Object`
+> > > - *"What to inspect?"*: `Key-indexed annotation of variables/features (var)`
+> > {: .hands_on}
+> {: .tip}
+>
+> >
+> >
+> > 1. If you examine your AnnData object, you'll find a number of different quality control metrics for:
+> > - *cells*, found in the {% icon param-file %} **Key-index observations annotation (obs)** output dataset
+> > - For example, you can find both discrete and log-based metrics for `n_genes` (how many genes are counted in a given cell), and `n_counts` (how many UMIs are counted in a given cell). This distinction between counts/UMIs or genes is because you might count multiple GAPDHs in a single cell. This would be 1 gene but multiple counts, therefore your `n_counts` should be higher than `n_genes` for an individual cell.
+> > - But what about the mitochondria?? You can also find `total_counts_mito`, `log1p_total_counts_mito`, and `pct_counts_mito`, which has been calculated for each cell.
+> > - and *genes*, found in the {% icon param-file %} **Key-index observations variables/features (var)** output dataset.
+> > - For example, you can find `n_cells_by_counts` (number of cells that gene appears in).
+> >
+> > 2. There are `31670 cells` and `35734 genes` in the matrix.
+> > - You can *peek* at your {% icon param-file %} Anndata Object in your {% icon galaxy-history %} history by selecting it to reveal a drop-down window that has this same information in it.
+> > - The matrix is `31670 x 35734`. This is `n_obs x n_vars`, or rather, `cells x genes`.
+> >
+> {: .solution}
+>
+{: .question}
+
+## Generate QC Plots
+
+We want to filter our cells, but first we need to know what our data looks like. There are a number of subjective choices to make within scRNA-seq analysis, for instance we now need to make our best informed decisions about where to set our thresholds (more on that soon!). We're going to plot our data a few different ways. Different bioinformaticians might prefer to see the data in different ways, and here we are only generating some of the myriad of plots you can use. Ultimately you need to go with what makes the most sense to you.
+
+> Making QC visualisations - Violin Plots
+>
+> 1. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `QC_Object`
+> - *"Method used for plotting"*: `Generic: Violin plot, using 'pl.violin'`
+> - *"Keys for accessing variables"*: `Subset of variables in 'adata.var_names' or fields of '.obs'`
+> - *"Keys for accessing variables"*: `log1p_total_counts,log1p_n_genes_by_counts,pct_counts_mito`
+> - *"The key of the observation grouping to consider"*: `genotype`
+>
+> 2. **Rename** {% icon galaxy-pencil %} output `Violin_log_genotype`
+>
+> 3. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `QC_Object`
+> - *"Method used for plotting"*: `Generic: Violin plot, using 'pl.violin'`
+> - *"Keys for accessing variables"*: `Subset of variables in 'adata.var_names' or fields of '.obs'`
+> - *"Keys for accessing variables"*: `log1p_total_counts,log1p_n_genes_by_counts,pct_counts_mito`
+> - *"The key of the observation grouping to consider"*: `sex`
+>
+> 4. **Rename** {% icon galaxy-pencil %} output `Violin_log_sex`
+>
+> 5. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `QC_Object`
+> - *"Method used for plotting"*: `Generic: Violin plot, using 'pl.violin'`
+> - *"Keys for accessing variables"*: `Subset of variables in 'adata.var_names' or fields of '.obs'`
+> - *"Keys for accessing variables"*: `log1p_total_counts,log1p_n_genes_by_counts,pct_counts_mito`
+> - *"The key of the observation grouping to consider"*: `batch`
+>
+> 6. **Rename** {% icon galaxy-pencil %} output `Violin_log_batch`
+>
+{: .hands_on}
+
+> Making QC visualisations - Scatterplots
+>
+> 1. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `QC_Object`
+> - *"Method used for plotting"*: `Generic: Scatter plot along observations or variables axes, using 'pl.scatter'`
+> - *"Plotting tool that computed coordinates"*: `Using coordinates`
+> - *"x coordinate"*: `log1p_total_counts`
+> - *"y coordinate"*: `pct_counts_mito`
+>
+> 2. **Rename** {% icon galaxy-pencil %} output `Scatter_UMIxMito`
+>
+> 3. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `QC_Object`
+> - *"Method used for plotting"*: `Generic: Scatter plot along observations or variables axes, using 'pl.scatter'`
+> - *"Plotting tool that computed coordinates"*: `Using coordinates`
+> - *"x coordinate"*: `log1p_n_genes_by_counts`
+> - *"y coordinate"*: `pct_counts_mito`
+>
+> 4. **Rename** {% icon galaxy-pencil %} output `Scatter_GenesxMito`
+>
+> 5. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `QC_Object`
+> - *"Method used for plotting"*: `Generic: Scatter plot along observations or variables axes, using 'pl.scatter'`
+> - *"Plotting tool that computed coordinates"*: `Using coordinates`
+> - *"x coordinate"*: `log1p_n_genes_by_counts`
+> - *"y coordinate"*: `log1p_total_counts`
+> - *"Color by"*: `pct_counts_mito`
+>
+> 6. **Rename** {% icon galaxy-pencil %} output `Scatter_GenesxUMI`
+>
+{: .hands_on}
+
+## Interpret the plots
+
+That's a lot of information! Let's attack this in sections and see what questions these plots can help us answer.
+
+{% snippet faqs/galaxy/features_scratchbook.md %}
+
+> Batch Variation
+>
+> Are there differences in sequencing depth across the samples?
+> 1. Which plot(s) addresses this?
+> 2. How do you interpret it?
+>
+> >
+> >
+> > 1. The plot `Violin_Log_Batch` will have what you're looking for!
+> > ")
+> > 2. Keeping in mind that this is a log scale - which means that small differences can mean large differences - the violin plots probably look pretty similar.
+> > - `N703` and `N707` might be a bit lower on genes and counts (or UMIs), but the differences aren't catastrophic.
+> > - The `pct_counts_mito` looks pretty similar across the batches, so this also looks good.
+> > - Nothing here would cause us to eliminate a sample from our analysis, but if you see a sample looking completely different from the rest, you would need to question why that is and consider eliminating it from your experiment!
+> >
+> {: .solution}
+>
+{: .question}
+
+> Biological Variables
+>
+> Are there differences in sequencing depth across sex? Genotype?
+> 1. Which plot(s) addresses this?
+> 2. How do you interpret the `sex` differences?
+> 3. How do you interpret the `genotype` differences?
+>
+> >
+> >
+> > 1. Similar to above, the plots `violin - sex - log` and `violin - genotype - log` will have what you're looking for!
+> > ")
+> > ")
+> > 2. There isn’t a major difference in sequencing depth across sex, I would say - though you are welcome to disagree!
+> > - It is clear there are far fewer female cells, which makes sense given that only one sample was female. *Note - that was an unfortunate discovery made long after generating libraries. It's quite hard to identify the sex of a neonate in the lab! In practice, try hard to not let such a confounding factor into your data! You could consider re-running all the following analysis without that female sample, if you wish.*
+> > 3. In `Violin - genotype - log`, however, we can see there is a difference. The `knockout` samples clearly have fewer genes and counts. From an experimental point of view, we can consider, does this make sense?
+> > - Would we biologically expect that those cells would be smaller or having fewer transcripts? Possibly, in this case, given that these cells were generated by growth restricted neonatal mice, and in which case we don’t need to worry about our good data, but rather keep this in mind when generating clusters, as we don’t want depth to define clusters, we want biology to!
+> > - On the other hand, it may be that those cells didn’t survive dissociation as well as the healthy ones (in which case we’d expect higher mitochondrial-associated genes, which we don’t see, so we can rule that out!).
+> > - Maybe we unluckily poorly prepared libraries for specifically those knockout samples. There are only three, so maybe those samples are under-sequenced.
+> > - So what do we do about all of this?
+> > - Ideally, we consider re-sequencing all the samples but with a higher concentration of the knockout samples in the library. Any bioinformatician will tell you that the best way to get clean data is in the lab, not the computer! Sadly, absolute best practice isn’t necessarily always a realistic option in the lab - for instance, that mouse line was long gone! - so sometimes, we have to make the best of it. There are options to try and address such discrepancy in sequencing depth. Thus, we’re going to take these samples forward and see if we can find biological insight despite the technical differences.
+> >
+> {: .solution}
+>
+{: .question}
+
+Now that we've assessed the differences in our samples, we will look at the libraries overall to identify appropriate thresholds for our analysis.
+
+> Filter Thresholds: genes
+>
+> What threshold should you set for `log1p_n_genes_by_counts`?
+> 1. Which plot(s) addresses this?
+> 2. What number would you pick?
+>
+> >
+> >
+> > 1. Any plot with `log1p_n_genes_by_counts` would do here, actually! Some people prefer scatterplots to violins.
+> > ")
+> >
+> > 2. In `Scatter - mito x genes` you can see how cells with `log1p_n_genes_by_counts` up to around, perhaps, `5.7` (around 300 genes) often have high `pct_counts_mito`.
+> > - You can plot this as just `n_counts` and see this same trend at around 300 genes, but with this data the log format is clearer so that's how we're presenting it.
+> > - You could also use the violin plots to come up with the threshold, and thus also take batch into account. It's good to look at the violins as well, because you don't want to accidentally cut out an entire sample (i.e. N703 and N707).
+> > - Some bioinformaticians would recommend filtering each sample individually, but this is difficult in larger scale and in this case (you're welcome to give it a go! You'd have to filter separately and then concatenate), it won't make a notable difference in the final interpretation.
+> >
+> {: .solution}
+{: .question}
+
+> Filter Thresholds: UMIs
+>
+> What threshold should you set for `log1p_total_counts`?
+> 1. Which plot(s) addresses this?
+> 2. What number would you pick?
+>
+> >
+> >
+> > 1. As before, any plot with `log1p_total_counts` will do! Again, we'll use a scatterplot here, but you can use a violin plot if you wish!
+> > ")
+> >
+> > 2. We can see that we will need to set a higher threshold (which makes sense, as you'd expect more UMI's per cell rather than unique genes!). Again, perhaps being a bit aggressive in our threshold, we might choose `6.3`, for instance (which amounts to around 500 counts/cell).
+> > - In an ideal world, you'll see a clear population of real cells separated from a clear population of debris. Many samples, like this one, are under-sequenced, and such separation would likely be seen after deeper sequencing!
+> >
+> {: .solution}
+{: .question}
+
+> Filter Thresholds: mito
+>
+> What threshold should you set for `pct_counts_mito`?
+> 1. Which plot(s) addresses this?
+> 2. What number would you pick?
+>
+> >
+> >
+> > 1. Any plot with `pct_counts_mito` would do here, however the scatterplots are likely the easiest to interpret. We'll use the same as last time.
+> > ")
+> >
+> > 2. We can see a clear trend wherein cells that have around 5% mito counts or higher also have far fewer total counts. These cells are low quality, will muddy our data, and are likely stressed or ruptured prior to encapsulation in a droplet. While 5% is quite a common cut-off, this is quite messy data, so just for kicks we'll go more aggressive with a `4.5%`.
+> > - In general, you must adapt all cut-offs to your data - metabolically active cells might have higher mitochondrial RNA in general, and you don't want to lose a cell population because of a cut-off.
+> >
+> {: .solution}
+{: .question}
+
+## Apply the thresholds
+
+It's now time to apply these thresholds to our data! First, a reminder of how many cells and genes are in your object: `31670 cells` and `35734 genes`. Let's see how that changes each time!
+
+> Working in a group? Decision-time!
+> If you are working in a group, you can now divide up a decision here with one *control* and the rest varied numbers so that you can compare results throughout the tutorials.
+> - Control
+> - **log1p_n_genes_by_counts** > `5.7`
+> - **log1p_total_counts** > `6.3`
+> - **pct_counts_mito** < `4.5%`
+> - Everyone else: Choose your own thresholds and compare results!
+{: .details}
+
+### Genes/cell
+
+> Filter cells by log1p_n_genes_by_counts
+>
+> 1. {% tool [Scanpy filter](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `QC_Object`
+> - *"Method used for filtering"*: `Filter on any column of observations or variables`
+> - *"What to filter?"*: `Observations (obs)`
+> - *"Type of filtering?"*: `By key (column) values`
+> - *"Key to filter"*: `log1p_n_genes_by_counts`
+> - *"Type of value to filter"*: `Number`
+> - *"Filter"*: `greater than`
+> - *"Value"*: `5.7`
+>
+> 1. {% tool [Scanpy filter](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `anndata_out` (output of **Scanpy filter** {% icon tool %})
+> - *"Method used for filtering"*: `Filter on any column of observations or variables`
+> - *"What to filter?"*: `Observations (obs)`
+> - *"Type of filtering?"*: `By key (column) values`
+> - *"Key to filter"*: `log1p_n_genes_by_counts`
+> - *"Type of value to filter"*: `Number`
+> - *"Filter"*: `less than`
+> - *"Value"*: `20.0`
+>
+> 2. **Rename** {% icon galaxy-pencil %} output as `Genes_Filtered_Object`
+>
+> 3. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `Genes_Filtered_Object`
+> - *"Method used for plotting"*: `Generic: Violin plot, using 'pl.violin'`
+> - *"Keys for accessing variables"*: `Subset of variables in 'adata.var_names' or fields of '.obs'`
+> - *"Keys for accessing variables"*: `log1p_total_counts,log1p_n_genes_by_counts,pct_counts_mito`
+> - *"The key of the observation grouping to consider"*: `genotype`
+>
+> 4. **Rename** {% icon galaxy-pencil %} output `Violin_log_genotype-Genes`
+>
+{: .hands_on}
+
+>
+>
+> 1. Interpret the violin plot
+> 2. How many genes & cells do you have in your object now?
+>
+> >
+> >
+> > 
+> > 
+> > 1. The only part that seems to change is the `log1p_n_genes_by_counts`. You can see a flatter bottom to the violin plot - this is the lower threshold set. Ideally, this would create a beautiful violin plot because there would be a clear population of low-gene number cells. Sadly not the case here, but still a reasonable filter.
+> > 2. If you *peek* at the AnnData object in your {% icon galaxy-history %}, you will find that you now have `17,104 cells x 35,734 genes`.
+> >
+> {: .solution}
+>
+{: .question}
+
+### UMIs/cell
+
+> Filter cells by log1p_total_counts
+>
+> 1. {% tool [Scanpy filter](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `Genes_Filtered_Object`
+> - *"Method used for filtering"*: `Filter on any column of observations or variables`
+> - *"What to filter?"*: `Observations (obs)`
+> - *"Type of filtering?"*: `By key (column) values`
+> - *"Key to filter"*: `log1p_total_counts`
+> - *"Type of value to filter"*: `Number`
+> - *"Filter"*: `greater than`
+> - *"Value"*: `6.3`
+>
+> 1. {% tool [Scanpy filter](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `anndata_out` (output of **Scanpy filter** {% icon tool %})
+> - *"Method used for filtering"*: `Filter on any column of observations or variables`
+> - *"What to filter?"*: `Observations (obs)`
+> - *"Type of filtering?"*: `By key (column) values`
+> - *"Key to filter"*: `log1p_total_counts`
+> - *"Type of value to filter"*: `Number`
+> - *"Filter"*: `less than`
+> - *"Value"*: `20.0`
+>
+> 2. **Rename** {% icon galaxy-pencil %} output as `UMIs_Filtered_Object`
+>
+> 3. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `UMIs_Filtered_Object`
+> - *"Method used for plotting"*: `Generic: Violin plot, using 'pl.violin'`
+> - *"Keys for accessing variables"*: `Subset of variables in 'adata.var_names' or fields of '.obs'`
+> - *"Keys for accessing variables"*: `log1p_total_counts,log1p_n_genes_by_counts,pct_counts_mito`
+> - *"The key of the observation grouping to consider"*: `genotype`
+>
+> 4. **Rename** {% icon galaxy-pencil %} output `Violin_log_genotype-UMIs`
+>
+{: .hands_on}
+
+>
+>
+> 1. Interpret the violin plot
+> 2. How many genes & cells do you have in your object now?
+>
+> >
+> >
+> > 
+> > 
+> > 1. We will focus on the `log1p_total_counts` as that shows the biggest change. Similar to above, the bottom of the violin shape has flattered due to the threshold.
+> > 2. You now have `8,677 cells x 35,734 genes` in the AnnData object.
+> >
+> {: .solution}
+>
+{: .question}
+
+### % Mito/cell
+
+> Filter cells by pct_counts_mito
+>
+> 1. {% tool [Scanpy filter](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `UMIs_Filtered_Object`
+> - *"Method used for filtering"*: `Filter on any column of observations or variables`
+> - *"What to filter?"*: `Observations (obs)`
+> - *"Type of filtering?"*: `By key (column) values`
+> - *"Key to filter"*: `pct_counts_mito`
+> - *"Type of value to filter"*: `Number`
+> - *"Filter"*: `less than`
+> - *"Value"*: `4.5`
+>
+> 2. **Rename** {% icon galaxy-pencil %} output as `Mito_Filtered_Object`
+>
+> 3. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `Mito_Filtered_Object`
+> - *"Method used for plotting"*: `Generic: Violin plot, using 'pl.violin'`
+> - *"Keys for accessing variables"*: `Subset of variables in 'adata.var_names' or fields of '.obs'`
+> - *"Keys for accessing variables"*: `log1p_total_counts,log1p_n_genes_by_counts,pct_counts_mito`
+> - *"The key of the observation grouping to consider"*: `genotype`
+>
+> 4. **Rename** {% icon galaxy-pencil %} output `Violin_log_genotype-Mito`
+>
+{: .hands_on}
+
+>
+>
+> 1. Interpret the violin plot
+> 2. How many genes & cells do you have in your object now?
+>
+> >
+> >
+> > 
+> > 
+> > 1. If we carefully check the axes, we can see that the `pct_counts_mito` has shrunk.
+> > 2. Your object now has `8,604 cells x 35,734 genes`.
+> >
+> {: .solution}
+>
+{: .question}
+
+> See every step of filtering together
+> 
+> 
+> 
+> 
+>
+{: .details}
+
+Fantastic work! However, you've now removed a whole heap of cells, and since the captured genes are sporadic (i.e. a small percentage of the overall transcriptome per cell) this means there are a number of genes in your matrix that are currently not in any of the remaining cells. Genes that do not appear in any cell, or even in only 1 or 2 cells, will make some analytical tools break and overall will not be biologically informative. So let's remove them! Note that `3` is not necessarily the best number, rather it is a fairly conservative threshold. You could go as high as 10 or more.
+
+> Working in a group? Decision-time!
+> If you are working in a group, you can now divide up a decision here with one *control* and the rest varied numbers so that you can compare results throughout the tutorials.
+> - Variable: **n_cells**
+> - Control > `3`
+> - Everyone else: Choose your own thresholds and compare results! Note if you go less than 3 (or even remove this step entirely), future tools are likely to fail due to empty gene data.
+{: .details}
+
+### Cells/gene
+
+> Filter genes
+>
+> 1. {% tool [Scanpy filter](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `Mito-filtered Object`
+> - *"Method used for filtering"*: `Filter on any column of observations or variables`
+> - *"Type of filtering?"*: `By key (column) values`
+> - *"Key to filter"*: `n_cells_by_counts`
+> - *"Type of value to filter"*: `Number`
+> - *"Filter"*: `greater than`
+> - *"Value"*: `3.0`
+>
+> 2. **Rename** {% icon galaxy-pencil %} output as `Cells_Filtered_Object`
+>
+{: .hands_on}
+
+In practice, you'll likely choose your thresholds then set up all these filters to run without checking plots in between each one. But it's nice to see how they work!
+
+We can summarise the results of our filtering:
+
+| | Cells | Genes |
+|------ |--------------------|
+| Raw | 31670 | 35734 |
+| Filter genes/cell | 17104 | 35734 |
+| Filter UMIs/cell | 8677 | 35734 |
+| Filter mito/cell | 8604 | 35734 |
+| Filter cells/gene | 8604 | 15950 |
+
+{% icon congratulations %} Congratulations! You have filtered your object! Now it should be a lot faster to analyse and easier to interpret.
+
+# Processing
+
+So currently, you have a matrix that is 8604 cells by 15950 genes. This is still quite big data. We have two issues here - firstly, you already know there are differences in how many transcripts and genes have been counted per cell. This technical variable can obscure biological differences. Secondly, we like to plot things on x/y plots, so for instance *Gapdh* could be on one axis, and *Actin* can be on another, and you plot cells on that 2-dimensional axis based on how many of each transcript they possess. While that would be fine, adding in a 3rd dimension (or, indeed, in this case, 15950 more dimensions), is a bit trickier! So our next steps are to transform our big data object into something that is easy to analyse and easy to visualise.
+
+> Normalisation
+>
+> 1. {% tool [Scanpy normalize](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_normalize/scanpy_normalize/1.10.2+galaxy0) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `Cells_Filtered_Object`
+> - *"Method used for normalization"*: `Normalize counts per cell, using 'pp.normalize_total'`
+> - *"Target sum"*: `10000.0`
+> - *"Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell"*: `No`
+> - *"Name of the field in 'adata.obs' where the normalization factor is stored"*: `norm`
+>
+> 2. {% tool [Scanpy Inspect and manipulate](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_inspect/scanpy_inspect/1.10.2+galaxy1) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `anndata_out` (output of **Scanpy normalize** {% icon tool %})
+> - *"Method used for inspecting"*: `Logarithmize the data matrix, using 'pp.log1p'`
+>
+> 3. {% tool [Manipulate AnnData](toolshed.g2.bx.psu.edu/repos/iuc/anndata_manipulate/anndata_manipulate/0.10.9+galaxy1) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `anndata_out` (output of **Scanpy Inspect and manipulate** {% icon tool %})
+> - *"Function to manipulate the object"*: `Freeze the current state into the 'raw' attribute`
+{: .hands_on}
+
+Normalisation helps reduce the differences between gene and UMI counts by fitting total counts to 10,000 per cell. The subsequent log-transform (by log(count+1)) aligns the gene expression level better with a normal distribution. This is fairly standard to prepare for any future dimensionality reduction. Finally, we freeze this information in the 'raw' attribute before we further manipulate the values.
+
+We next need to look at reducing our gene dimensions. We have loads of genes, but not all of them are different from cell to cell. For instance, housekeeping genes are defined as not changing much from cell to cell, so we could remove these from our data to simplify the dataset. We will flag genes that vary across the cells for future analysis.
+
+> Find variable genes
+>
+> 1. {% tool [Scanpy filter](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `anndata` (output of **Manipulate AnnData** {% icon tool %})
+> - *"Method used for filtering"*: `Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'`
+> - *"Choose the flavor for identifying highly variable genes"*: `Seurat`
+>
+{: .hands_on}
+
+> More details on the Highly Variable Genes
+>
+> Would you like to know how *many* genes were flagged as **Highly variable genes**?
+>
+>
+> > Find the number of variable genes
+> >
+> >
+> > 1. Select {% icon galaxy-refresh %} **Run Job Again** on the `anndata_out` (output of **Scanpy filter** {% icon tool %}) in your {% icon galaxy-history %} history
+> >
+> > 2. {% tool [Scanpy filter](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3) %} with the following parameters:
+> > - {% icon param-file %} *"Annotated data matrix"*: `anndata` (output of **Manipulate AnnData** {% icon tool %})
+> > - *"Method used for filtering"*: `Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'`
+> > - *"Choose the flavor for identifying highly variable genes"*: `Seurat`
+> > - *"Inplace subset to highly-variable genes?"*: {% icon galaxy-toggle %} **Yes**
+> >
+> {: .hands_on}
+>
+>
+> If you peek at the output, you will see that the number of *genes* in your AnnData object has drastically reduced to around `3216` - this dataset has *only* the highly variable genes! Some people prefer to only perform analysis on this dataset, however I have found that sometimes (for various reasons) important biological marker genes get excluded. For this reason, I personally will flag highly variable genes for use in the next analytical steps, however I keep all the genes in my AnnData object so that I can check for key ones in the future.
+>
+> - {% icon warning %} For this tutorial, you **must** keep all genes in your AnnData object. Therefore, delete the output that contains *only* the highly variable genes from your {% icon galaxy-history %} history now.
+>
+>
+{: .details}
+
+Next up, we're going to scale our data so that all genes have the same variance and a zero mean. This is important to set up our data for further dimensionality reduction. It also helps negate sequencing depth differences between samples, since the gene levels across the cells become comparable. Note, that the differences from scaling etc. are not the values you have at the end - i.e. if your cell has average GAPDH levels, it will not appear as a '0' when you calculate gene differences between clusters.
+
+> Scale data
+>
+> 1. {% tool [Scanpy Inspect and manipulate](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_inspect/scanpy_inspect/1.10.2+galaxy1) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `anndata_out` (output of **Scanpy filter** {% icon tool %})
+> - *"Method used for inspecting"*: `Scale data to unit variance and zero mean, using 'pp.scale'`
+> - *"Maximum value"*: `10.0`
+>
+> 3. **Rename** {% icon galaxy-pencil %} output `Scaled_Object`
+>
+{: .hands_on}
+
+{% icon congratulations %} Congratulations! You have processed your object!
+
+> More details on batch correction and removing unwanted variation
+>
+> At this point, you might want to remove or regress out the effects of unwanted variation on our data. A common example of this is the cell cycle, which can affect which genes are expressed and how much material is present in our cells. If you’re interested in learning how to do this, then you can move over to the {% icon level %} [Removing the Effects of the Cell Cycle]({% link topics/single-cell/tutorials/scrna-case_cell-cycle/tutorial.md %}) tutorial now and return here to complete your analysis.
+>
+> - {% icon warning %} If you are in a *live course*, the time to do this *bonus tutorial* will not be factored into the schedule. Please instead return to this *after* your course is finished, or if you finish early!
+>
+{: .details}
+
+# Preparing coordinates
+
+We still have too many dimensions. Transcript changes are not usually singular - which is to say, genes were in pathways and in groups. It would be easier to analyse our data if we could more easily group these changes.
+
+## Principal components
+Principal components are calculated from highly dimensional data to find the most spread in the dataset. Given that our object has around `3216` highly variable genes, that's 3216 dimensions. There will, however, be one line/axis/dimension that yields the most spread and variation across the cells. That will be our first principal component. We can calculate the first `x` principal components in our data to drastically reduce the number of dimensions.
+
+> Check the size of your AnnData object!
+> Your AnnData object should have far more than 3216 genes in it (if you followed our settings and tool versions, you'll have a matrix around 8604 × 15950 (cells x genes). If you followed the *More details on the Highly Variable Genes above, you may have created an object with *only* the highly variable genes. Please delete this object and do not use it! Carry forward the object with around 8604 × 15950 (cells x genes)!
+{: .warning}
+
+> Calculate Principal Components
+>
+> 1. {% tool [Scanpy cluster, embed](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_cluster_reduce_dimension/scanpy_cluster_reduce_dimension/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `Scaled_Object`
+> - *"Method used"*: `Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using 'pp.pca'`
+> - *"Type of PCA?"*: `Full PCA`
+> - *"Change to use different initial states for the optimization"*: `1`
+>
+> 2. **Rename** {% icon galaxy-pencil %} output `PCA_Object`
+>
+> 3. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `PCA_Object`
+> - *"Method used for plotting"*: `PCA: Scatter plot in PCA coordinates, using 'pl.pca_variance_ratio'`
+> - *"Number of PCs to show"*: `50`
+>
+> 3. **Rename** {% icon galaxy-pencil %} plot output `PCA_Variance_Plot`
+>
+{: .hands_on}
+
+Why 50 principal components you ask? Well, we're pretty confident 50 is an over-estimate. Examine `PCA Variance`.
+
+
+
+We can see that there is really not much variation explained past component 19. So we might save ourselves a great deal of time and muddied data by focusing on the top `20` PCs. (You could probably even go as low as 10!)
+
+## Neighborhood graph
+
+We're still looking at around 20 dimensions at this point in our analysis. We need to identify how similar a cell is to another cell, across every cell across these dimensions. For this, we will use the k-nearest neighbor (kNN) graph, to identify which cells are close together and which are not. The kNN graph plots connections between cells if their distance (when plotted in this 20 dimensional space!) is amongst the k-th smallest distances from that cell to other cells. This will be crucial for identifying clusters, and is necessary for plotting a UMAP. From [UMAP developers](https://github.com/lmcinnes/umap): "Larger neighbor values will result in more global structure being preserved at the loss of detailed local structure. In general this parameter should often be in the range 5 to 50, with a choice of 10 to 15 being a sensible default".
+
+> Working in a group? Decision-time!
+> If you are working in a group, you can now divide up a decision here with one *control* and the rest varied numbers so that you can compare results throughout the tutorials.
+> - Control
+> - **Number of PCs to use** = `20`
+> - **Maximum number of neighbours used** = `15`
+> - Everyone else: Use the PC variance plot to pick your own PC number, and choose your own neighbour maximum as well!b
+{: .details}
+
+> ComputeGraph
+>
+> 1. {% tool [Scanpy Inspect and manipulate](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_inspect/scanpy_inspect/1.10.2+galaxy1) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `PCA_Object`
+> - *"Method used for inspecting"*: `Compute a neighborhood graph of observations, using 'pp.neighbors'`
+> - *"Number of PCs to use"*: `20`
+> - *"Use the indicated representation"*: `X_pca`
+> - *"Numpy random seed"*: `1`
+>
+> 2. **Rename** {% icon galaxy-pencil %} output `Neighbours_Object`
+>
+{: .hands_on}
+
+## Dimensionality reduction for visualisation
+
+Two major visualisations for this data are tSNE and UMAP. We must calculate the coordinates for both prior to visualisation. For tSNE, the parameter [**perplexity**](https://www.nature.com/articles/s41467-019-13056-x) can be changed to best represent the data, while for UMAP the main change would be to change the kNN graph above itself, by changing the **neighbours**.
+
+> Working in a group? Decision-time!
+> If you are working in a group, you can now divide up a decision here with one *control* and the rest varied numbers so that you can compare results throughout the tutorials.
+> - Control
+> - **Perplexity** = `30`
+> - Everyone else: Choose your own perplexity, between 5 and 50!
+{: .details}
+
+> Calculating tSNE & UMAP
+>
+> 1. {% tool [Scanpy cluster, embed](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_cluster_reduce_dimension/scanpy_cluster_reduce_dimension/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `Neighbours_Object`
+> - *"Method used"*: `t-distributed stochastic neighborhood embedding (tSNE), using 'tl.tsne'`
+> - *"Number of PCs to use"*: `20`
+> - *"Use the indicated representation"*: `X_pca`
+> - *"Random state"*: `1`
+>
+> 2. {% tool [Scanpy cluster, embed](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_cluster_reduce_dimension/scanpy_cluster_reduce_dimension/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `anndata_out` (output of **Scanpy cluster, embed** {% icon tool %})
+> - *"Method used"*: `Embed the neighborhood graph using UMAP, using 'tl.umap'`
+> - *"Seed used by the random number generator"*: `1`
+>
+> 2. **Rename** {% icon galaxy-pencil %} output `UMAP_Object`
+>
+{: .hands_on}
+
+{% icon congratulations %} Congratulations! You have prepared your object and created neighborhood coordinates. We can now use those to call some clusters!
+
+# Cell clusters & gene markers
+
+>
+>
+> Let's take a step back here. What is it, exactly, that you are trying to get from your data? What do you want to visualise, and what information do you need from your data to gain insight?
+>
+> >
+> >
+> > Really we need two things - firstly, we need to make sure our experiment was set up well. This is to say, our biological replicates should overlap and our variables should, ideally, show some difference. Secondly, we want insight - we want to know which cell types are in our data, which genes drive those cell types, and in this case, how they might be affected by our biological variable of growth restriction. How does this affect the developing cells, and what genes drive this? So let's add in information about cell clusters and gene markers!
+> >
+> {: .solution}
+>
+{: .question}
+
+Finally, let's identify clusters! Unfortunately, it's not as majestic as biologists often think - the maths doesn't necessarily identify true cell clusters. Every algorithm for identifying cell clusters falls short of a biologist knowing their data, knowing what cells should be there, and proving it in the lab. Sigh. So, we're going to make the best of it as a starting point and see what happens! We will define clusters from the kNN graph, based on how many connections cells have with one another. Roughly, this will depend on a **resolution** parameter for how granular you want to be.
+
+> Working in a group? Decision-time!
+> Oh yes, yet another decision! Single cell analysis is sadly not straight forward.
+> - Control
+> - **Resolution, high value for more and smaller clusters** = `0.5`
+> - **Clustering algorithm** = `Louvain`
+> - Everyone else: Pick your own number. If it helps, this sample should have a lot of very similar cells in it. It contains developing T-cells, so you aren't expecting massive differences between cells, like you would in, say, an entire embryo, with all sorts of unrelated cell types.
+> - Everyone else: Consider the newer **Leiden** clustering method. Note that in future parameters, you will likely need to specify 'leiden' rather than 'louvain', which is the default, if you choose this clustering method.
+{: .details}
+
+> FindClusters
+>
+> 1. {% tool [Scanpy cluster, embed](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_cluster_reduce_dimension/scanpy_cluster_reduce_dimension/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `UMAP_Object`
+> - *"Method used"*: `Cluster cells into subgroups, using 'tl.louvain'`
+> - *"Flavor for the clustering"*: `vtraag (much more powerful than igraph)`
+> - *"Resolution"*: `0.5`
+> - *"Random state"*: `1`
+>
+> 2. **Rename** {% icon galaxy-pencil %} output `Clustered_Object`
+>
+{: .hands_on}
+
+Nearly plotting time! But one final piece is to add in SOME gene information. Let's focus on genes that distinguish the clusters.
+
+## Find Gene Markers
+
+> Find Gene markers for each cluster
+>
+> 1. {% tool [Scanpy Inspect and manipulate](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_inspect/scanpy_inspect/1.10.2+galaxy1) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `Clustered_Object`
+> - *"Method used for inspecting"*: `Rank genes for characterizing groups, using 'tl.rank_genes_groups'`
+> - *"Get ranked genes as a Tabular file?"*: `True`
+> - *"Column name in [.var] DataFrame that stores gene symbols."*: `Symbol`
+> - *"The key of the observations grouping to consider"*: `louvain`
+> - *"Use 'raw' attribute of input if present"*: `Yes`
+> - *"Comparison"*: `Compare each group to the union of the rest of the group`
+> - *"The number of genes that appear in the returned tables"*: `100`
+> - *"Method"*: `t-test with overestimate of variance of each group`
+>
+> 2. **Rename** {% icon galaxy-pencil %} output table (not h5ad) `Ranked_Genes-by_cluster`
+>
+> 3. **Rename** {% icon galaxy-pencil %} output h5ad file `DEG_Object`
+{: .hands_on}
+
+> What about comparing across genotypes?
+>
+> Given that we are also interested in differences across genotype, we can also use the find markers function to check that (or any other **Obs** metadata)... roughly.
+>
+> > Comparing across genotypes
+> >
+> > 1. {% tool [Scanpy Inspect and manipulate](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_inspect/scanpy_inspect/1.10.2+galaxy1) %} with the following parameters:
+> > - {% icon param-file %} *"Annotated data matrix"*: `Clustered_Object`
+> > - *"Method used for inspecting"*: `Rank genes for characterizing groups, using 'tl.rank_genes_groups'`
+> > - *"Get ranked genes as a Tabular file?"*: `True`
+> > - *"Column name in [.var] DataFrame that stores gene symbols."*: `Symbol`
+> > - *"The key of the observations grouping to consider"*: `genotype`
+> > - *"Use 'raw' attribute of input if present"*: `Yes`
+> > - *"Comparison"*: `Compare each group to the union of the rest of the group`
+> > - *"The number of genes that appear in the returned tables"*: `100`
+> > - *"Method"*: `t-test with overestimate of variance of each group`
+> >
+> > 2. **Rename** {% icon galaxy-pencil %} output table (not h5ad) `Ranked_Genes-by_Genotype`
+> >
+> > Do not **Rename** the output AnnData object (in fact, you can delete it!). You have the genotype marker table to enjoy, but we want to keep the cluster comparisons, rather than gene comparisons, stored in the AnnData object for later.
+> >
+> {: .hands_on}
+>
+> However, this analysis only give you a rough idea. It is more statistically accurate to convert each cluster into a *pseudobulk* sample, and analyse those. You can find more details about that in our {% icon level %} [pseudobulk tutorial]({% link topics/single-cell/tutorials/pseudobulk-analysis/tutorial.md %})
+>
+> - {% icon warning %} If you are in a *live course*, the time to do this *bonus tutorial* will not be factored into the schedule. Please instead return to this *after* your course is finished, or if you finish early!
+>
+{: .details}
+
+{% icon congratulations %} Well done! You have cool tables of genes. It's now time for the best bit, the plotting!
+
+# Plotting!
+
+It's time! Let's plot it all!
+But first, let's pick some known marker genes that can distinguish different cell types. I'll be honest, in practice, you'd now be spending a lot of time looking up what each gene does (thank you google!). There are burgeoning automated-annotation tools, however, so long as you have a good reference (a well annotated dataset that you'll use as the ideal). In the mean time, let's do this the old-fashioned way, and just copy a bunch of the markers in the original paper.
+
+> Plot the cells!
+>
+> 1. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `DEG_Object`
+> - *"Method used for plotting"*: `Embeddings: Scatter plot in tSNE basis, using 'pl.tsne'`
+> - *"Keys for annotations of observations/cells or variables/genes"*: `louvain,sex,batch,genotype,Il2ra,Cd8b1,Cd8a,Cd4,Itm2a,Aif1,log1p_total_counts`
+> - *"Key for field in '.var' that stores gene symbols"*: `Symbol`
+>
+> 2. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data atrix"*: `DEG_Object`
+> - *"Method used for plotting"*: `PCA: Scatter plot in PCA coordinates, using 'pl.pca'`
+> - *"Keys for annotations of observations/cells or variables/genes"*: `louvain,sex,batch,genotype,Il2ra,Cd8b1,Cd8a,Cd4,Itm2a,Aif1,log1p_total_counts`
+> - *"Key for field in '.var' that stores gene symbols"*: `Symbol`
+>
+> 3. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `DEG_Object`
+> - *"Method used for plotting"*: `Embeddings: Scatter plot in UMAP basis, using 'pl.umap'`
+> - *"Keys for annotations of observations/cells or variables/genes"*: `louvain,sex,batch,genotype,Il2ra,Cd8b1,Cd8a,Cd4,Itm2a,Aif1,log1p_total_counts`
+> - *"Key for field in '.var' that stores gene symbols"*: `Symbol`
+>
+{: .hands_on}
+
+{% icon congratulations %} Congratulations! You now have plots galore!
+
+# Insights into the beyond
+
+Now it's the fun bit! We can see where genes are expressed, and start considering and interpreting the biology of it. At this point, it's really about what information you want to get from your data - the following is only the tip of the iceberg. However, a brief exploration is good, because it may help give you ideas going forward with for your own data. Let us start interrogating our data!
+
+> Your results may look different!
+> These tools rely on machine learning, which involves randomisation. While we have used the options to 'set random seed' to 1 where we can, it's not perfect at ensuring every analysis is identical.
+> - Your results may look different.
+> - Your clusters may be in different orders.
+>
+> You will have to adjust your annotation and interpretation accordingly...which is exactly what scientists have to do!
+{: .warning}
+
+## Biological Interpretation
+
+> Appearance is everything
+>
+> Which visualisation is the most useful for getting an overview of our data, *pca*, *tsne*, or *umap*?
+>
+> >
+> >
+> > 
+> > 
+> > 
+> >
+> > You can see why a PCA is generally not enough to see clusters in samples - keep in mind, you're only seeing components 1 and 2! - and therefore why the tSNE and UMAP visualisation dimensionality reductions are so useful. But there is not necessarily a clear winner between tSNE and UMAP, but I think UMAP is slightly clearer with its clusters, so we'll stick with that for the rest of the analysis.
+> >
+> {: .solution}
+>
+{: .question}
+
+Note that the cluster numbering is based on size alone - clusters 0 and 1 are not necessarily related, they are just the clusters containing the most cells. It would be nice to know what exactly these cells are. This analysis (googling all of the marker genes, both checking where the ones you know are as well as going through the marker tables you generated!) is a fun task for any individual experiment, so we're going to speed past that and nab the assessment from the original paper!
+
+| Clusters | Marker | Cell type |
+|------ |--------------------|
+| 3 | Il2ra | Double negative (early T-cell) |
+| 0,1,4 | Cd8b1, Cd8a, Cd4 | Double positive (middle T-cell)|
+| 5 | Cd8b1, Cd8a, Cd4 - high | Double positive (late middle T-cell)
+| 2 | Itm2a | Mature T-cell
+| 6 | Aif1 | Macrophages |
+
+
+
+{% icon warning %} Remember, **your clusters may be in a different order!** Look for the expression of the marker genes in order to annotate your clusters.
+
+>
+>
+> Let's consider how you might handle a different output. I personally re-ran the same workflow from this tutorial five times and got two different results. Here's one of the other outputs I got.
+> 
+>
+> 1. What is different about that plot?
+> 2. How would you adjust your annotation?
+>
+> >
+> >
+> > 1. While the cells are in the same places (which may not always be the case!), the clustering is different. The large Double positive (middle T-cell) cluster has more evenly divided into three clusters, which has therefore changed the ordering of cluster size.
+> >
+> > 2. The cluster annotation would be different:
+> > | Clusters | Marker | Cell type |
+> > |----------|------------------------------|----------------------------------|
+> > | 2 | Il2ra | Double negative (early T-cell) |
+> > | 0,1,5 | Cd8b1, Cd8a, Cd4 | Double positive (middle T-cell)|
+> > | 4 | Cd8b1, Cd8a, Cd4 - high | Double positive (late middle T-cell)
+> > | 3 | Itm2a | Mature T-cell
+> > | 6 | Aif1 | Macrophages |
+> >
+> {: .solution}
+{: .question}
+
+The authors weren't interested in further annotation of the DP cells, so neither are we. Sometimes that just happens. The maths tries to call similar (ish) sized clusters, whether it is biologically relevant or not. Or, the question being asked doesn't really require such granularity of clusters.
+
+> Working in a group? Important!
+> If you have deviated from any of the original parameters in this tutorial, you will likely have a different number of clusters. You will, therefore, need to change the upcoming 'Annotating clusters' *"Comma-separated list of new categories"* accordingly. Best of luck!
+>
+{: .details}
+
+### Annotating Clusters
+
+To annotate the clusters, we write a list of new cluster names in order from Cluster 0 onwards. In this case, that list is: `DP-M3,DP-M1,DN,T-mat,DP-L,DP-M2,Macrophages`
+
+>
+>
+> Imagine you had that second version of an analysis shared above.
+> The cluster annotation was different:
+> | Clusters | Marker | Cell type |
+> |----------|------------------------------|----------------------------------|
+> | 3 | Il2ra | Double negative (early T-cell) |
+> | 0,1,5 | Cd8b1, Cd8a, Cd4 | Double positive (middle T-cell)|
+> | 4 | Cd8b1, Cd8a, Cd4 - high | Double positive (late middle T-cell)
+> | 3 | Itm2a | Mature T-cell
+> | 6 | Aif1 | Macrophages |
+>
+> 1. What would your cluster names list look like?
+>
+> >
+> >
+> > 1. Given this new order, your list would be: `DP-M3,DP-M1,DN,T-mat,DP-L,DP-M2,Macrophages`
+> >
+> {: .solution}
+{: .question}
+
+Adjust your list according to the expression of the gene markers.
+
+> Annotating clusters
+>
+> 1. {% tool [Manipulate AnnData](toolshed.g2.bx.psu.edu/repos/iuc/anndata_manipulate/anndata_manipulate/0.10.9+galaxy1) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `DEG_Object`
+> - *"Function to manipulate the object"*: `Rename categories of annotation`
+> - *"Key for observations or variables annotation"*: `louvain`
+> - *"Comma-separated list of new categories"*: `DP-M3,DP-M1,T-mat,DN,DP-M2,DP-L,Macrophages`
+> - *"Add categories to a new key?"*: `Yes`
+> - *"Key name"*: `cell_type`
+>
+> 2. **Rename** {% icon galaxy-pencil %} output h5ad `Annotated_Object`
+>
+> Now, it's time to re-plot with these annotations!
+>
+> 3. {% tool [Scanpy plot](toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2) %} with the following parameters:
+> - {% icon param-file %} *"Annotated data matrix"*: `Annotated_Object`
+> - *"Method used for plotting"*: `Embeddings: Scatter plot in UMAP basis, using 'pl.umap'`
+> - *"Keys for annotations of observations/cells or variables/genes"*: `batch,Il2ra,Itm2a,sex,Cd8b1,Cd8a,Cd4,genotype,Aif1,Hba-a1,log1p_total_counts,cell_type`
+> - *"Key for field in '.var' that stores gene symbols"*: `Symbol`
+>
+> 4. **Rename** {% icon galaxy-pencil %} output plot `Annotated_Plots`
+>
+{: .hands_on}
+
+
+
+Now that we know what we're dealing with, let's examine the effect of our variable, proper science!
+
+> Genotype
+>
+> Are there any differences in genotype? Or in biological terms, is there an impact of growth restriction on T-cell development in the thymus?
+>
+> 
+>
+> >
+> >
+> > We can see that DP-L, which seems to be extending away from the DP-M bunch, as well as the mature T-cells (or particularly the top half) are missing some knockout cells. Perhaps there is some sort of inhibition here? INTERESTING! What next? We might look further at the transcripts present in both those populations, and perhaps also look at the genotype marker table... So much to investigate! But before we set you off to explore to your heart's delight, let's also look at this a bit more technically.
+> >
+> {: .solution}
+>
+{: .question}
+
+## Technical Assessment
+
+Is our analysis real? Is it right? Well, we can assess that a little bit.
+
+> Batch effect
+>
+> Is there a batch effect?
+>
+> 
+>
+> >
+> >
+> > While some shifts are expected and nothing to be concerned about, DP-L looks to be mainly comprised of N705. There might be a bit of batch effect, so you could consider using batch correction on this dataset. However, if we focus our attention on the other cluster - mature T-cells - where there is batch mixing, we can still assess this biologically even without batch correction.
+> > Additionally, we will also look at the confounding effect of sex.
+> >
+> > 
+> >
+> > We note that the one female sample - unfortunately one of the mere three knockout samples - seems to be distributed in the same areas as the knockout samples at large, so luckily, this doesn't seem to be a confounding factor and we can still learn from our data. Ideally, this experiment would be re-run with either more female samples all around or swapping out this female from the male sample.
+> >
+> {: .solution}
+>
+{: .question}
+
+> Depth effect
+>
+> Are there any clusters or differences being driven by sequencing depth, a technical and random factor?
+>
+> 
+>
+> >
+> >
+> > Eureka! This explains the odd DP shift between wildtype and knockout cells - the left side of the DP cells simply have a higher sequencing depth (UMIs/cell) than the ones on the right side. Well, that explains some of the sub-cluster that we're seeing in that splurge. Importantly, we don't see that the DP-L or (mostly) the mature T-cell clusters are similarly affected. So, whilst again, this variable of sequencing depth might be something to regress out somehow, it doesn't seem to be impacting our dataset. The less you can regress/modify your data, in general, the better - you want to stay as true as you can to the raw data, and only use maths to correct your data when you really need to (and not to create insights where there are none!).
+> >
+> {: .solution}
+>
+{: .question}
+
+> Sample purity
+>
+> Do you think we processed these samples well enough?
+>
+> 
+>
+> >
+> >
+> > We have seen in the previous images that these clusters are not very tight or distinct, so we could consider stronger filtering. Additionally, hemoglobin - a red blood cell marker that should NOT be found in T-cells - appears throughout the entire sample in low numbers. This suggests some background in the media the cells were in, and we might consider in the wet lab trying to get a purer, happier sample, or in the dry lab, techniques such as SoupX or others to remove this background. Playing with filtering settings (increasing minimum counts/cell, etc.) is often the place to start in these scenarios.
+> >
+> {: .solution}
+>
+{: .question}
+
+> Clustering resolution
+>
+> Do you think the clustering is appropriate? i.e. are there single clusters that you think should be separate, and multiple clusters that could be combined?
+>
+> 
+>
+> >
+> >
+> > Important to note, lest all bioinformaticians combine forces to attack the biologists: just because a cluster doesn't look like a cluster by eye is NOT enough to say it's not a cluster! But looking at the biology here, we struggled to find marker genes to distinguish the DP population, which we know is also affected by depth of sequencing. That's a reasonable argument that DP-M1, DP-M2, and DP-M3 might not be all that different. Maybe we need more depth of sequencing across all the DP cells, or to compare these explicitly to each other (consider variations on FindMarkers!). However, DP-L is both seemingly leaving the DP cluster and also has fewer knockout cells, so we might go and look at what DP-L is expressing in the marker genes. If we look at T-mat further, we can see that its marker gene - Itm2a - is only expressed in half of the cluster. You might consider sub-clustering this to investigate further, either through changing the resolution or through analysing this cluster alone.
+> > If we look at the differences between genotypes alone (so the pseudo-bulk), we can see that most of the genes in that list are actually ribosomal. This might be a housekeeping background, this might be cell cycle related, this might be biological, or all three. You might consider investigating the cycling status of the cells, or even regressing this out (which is what the authors did).
+> {: .solution}
+>
+{: .question}
+
+Ultimately, there are quite a lot ways to analyse the data, both within the confines of this tutorial (the many parameters that could be changed throughout) and outside of it (batch correction, sub-clustering, cell-cycle scoring, inferred trajectories, etc.) Most analyses will still yield the same general output, though: there are fewer knockout cells in the mature T-cell population.
+
+{% icon congratulations %} Congratulations! You have interpreted your plots in several important ways!
+
+# Interactive visualisations
+
+Before we leave you to explore the unknown, you might have noticed that the above interpretations are only a few of the possible options. Plus you might have had fun trying to figure out which sample is which genotype is which sex and flicking back and forth between plots repeatedly. Figuring out which plots will be your *final publishable* plots takes a lot of time and testing. Luckily, there is a helpful interactive viewer {% cite Cakir2020 %} export tool that can help you explore without having to produce new plots over and over!
+
+> Cellxgene
+>
+> 1. {% tool [Interactive CELLxGENE VIP Environment](interactive_tool_cellxgene_vip) %} with the following parameters:
+> - {% icon param-file %} *"Concatenate dataset"*: `Annotated_Object`
+> - *"Var field for gene symbols"*: `Symbol`
+> - *"Make specified var field unique"*: {% icon galaxy-toggle %} **Yes**
+>
+> 2. When ready, you will see a message
+> - {% icon details %} *There is an InteractiveTool result view available, click here to display* <---- Click there!
+>
+> Sometimes this link can aggravate a firewall or something similar. It should be fine to go to the site.
+>
+> 3. You will be asked to `name your annotation`. Do so, then you can start playing around!
+>
+> 4. You will need to `STOP` this active environment in Galaxy by going to `User`, `Interactive Tools`, selecting the environment, and selecting `Stop`. You may also want to delete the dataset in the history, because otherwise it continues appearing as if it's processing.
+>
+{: .hands_on}
+
+Be warned - this visualisation tool is a powerful option for exploring your data, but it takes some time to get used to. Consider exploring it as your own tutorial for another day!
+
+
+# Conclusion
+
+
+> Working in a group? The finale!
+> Hopefully, no matter which pathway of analysis you took, you found the same general interpretations. If not, this is a good time to discuss and consider with your group why that might be - what decision was 'wrong' or 'ill-advised', and how would you go about ensuring you correctly interpreted your data in the future? Top tip - trial and error is a good idea, believe it or not, and the more ways you find the same insight, the more confident you can be! But nothing beats experimental validation...
+> For those that did not take the 'control' options, please
+> 1. **Rename** your history (by clicking on the history title) as `DECISION-Filtering and Plotting Single-cell RNA-seq Data`
+> 2. Add a history annotation {% icon history-annotate %} that includes which parameters you changed/steps you changed from the *control*
+>
+> {% snippet faqs/galaxy/histories_sharing.md %}
+>
+> 3. Feel free to explore any other similar histories
+{: .details}
+
+{% icon congratulations %} Congratulations! You've made it to the end!
+
+You might find the {% icon galaxy-history-answer %} *Answer Key Histories* helpful to check or compare with:
+
+ - {% for h in page.answer_histories %}
+ [ {{h.label}} ]( {{h.history}} )
+ {% endfor %}
+
+You can also run this entire tutorial via a {% icon galaxy-workflows-activity %} *Workflow*, after performing the **Get data** step initially.
+ - [Tutorial Workflow]({% link topics/single-cell/tutorials/scrna-case_basic-pipeline/workflows/ %})
+
+
+
+In this tutorial, you moved from technical processing to biological exploration. By analysing real data - both the exciting and the messy! - you have, hopefully, experienced what it's like to analyse and question a dataset, potentially without clear cut-offs or clear answers. If you were working in a group, you each analysed the data in different ways, and most likely found similar insights. One of the biggest problems in analysing scRNA-seq is the lack of a clearly defined pathway or parameters. You have to make the best call you can as you move through your analysis, and ultimately, when in doubt, try it multiple ways and see what happens!
+
+{% snippet topics/single-cell/faqs/user_community_join.md %}
diff --git a/topics/single-cell/tutorials/spatial_Vizgen_squidpy/workflows/Galaxy-Workflow-Vizgen_analysis_using_Squidpy_Part1.ga b/topics/single-cell/tutorials/spatial_Vizgen_squidpy/workflows/Galaxy-Workflow-Vizgen_analysis_using_Squidpy_Part1.ga
new file mode 100644
index 00000000000000..5b32982ea2768c
--- /dev/null
+++ b/topics/single-cell/tutorials/spatial_Vizgen_squidpy/workflows/Galaxy-Workflow-Vizgen_analysis_using_Squidpy_Part1.ga
@@ -0,0 +1,635 @@
+{
+ "a_galaxy_workflow": "true",
+ "annotation": "",
+ "comments": [],
+ "format-version": "0.1",
+ "name": "Vizgen analysis using Squidpy Part1",
+ "report": {
+ "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n"
+ },
+ "steps": {
+ "0": {
+ "annotation": "",
+ "content_id": null,
+ "errors": null,
+ "id": 0,
+ "input_connections": {},
+ "inputs": [
+ {
+ "description": "",
+ "name": "vizgen_vpt_out_from_amir.h5ad"
+ }
+ ],
+ "label": "vizgen_vpt_out_from_amir.h5ad",
+ "name": "Input dataset",
+ "outputs": [],
+ "position": {
+ "left": 0,
+ "top": 170
+ },
+ "tool_id": null,
+ "tool_state": "{\"optional\": false, \"tag\": null}",
+ "tool_version": null,
+ "type": "data_input",
+ "uuid": "9689bf55-81a6-4a0a-9667-789bc18e86cf",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "1": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_inspect/scanpy_inspect/1.10.2+galaxy2",
+ "errors": null,
+ "id": 1,
+ "input_connections": {
+ "adata": {
+ "id": 0,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scanpy Inspect and manipulate",
+ "outputs": [
+ {
+ "name": "anndata_out",
+ "type": "h5ad"
+ }
+ ],
+ "position": {
+ "left": 300,
+ "top": 170
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_inspect/scanpy_inspect/1.10.2+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "3b5ef3b00cdd",
+ "name": "scanpy_inspect",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"adata\": {\"__class__\": \"ConnectedValue\"}, \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.calculate_qc_metrics\", \"__current_case__\": 0, \"expr_type\": \"counts\", \"var_type\": \"genes\", \"qc_vars\": \"\", \"percent_top\": \"20,40,60,80,120\", \"layer\": \"\", \"use_raw\": false, \"log1p\": true}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.10.2+galaxy2",
+ "type": "tool",
+ "uuid": "b82be9c3-40d1-4c3f-8447-fed340a4b42a",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "2": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3",
+ "errors": null,
+ "id": 2,
+ "input_connections": {
+ "adata": {
+ "id": 1,
+ "output_name": "anndata_out"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scanpy filter",
+ "outputs": [
+ {
+ "name": "anndata_out",
+ "type": "h5ad"
+ }
+ ],
+ "position": {
+ "left": 600,
+ "top": 170
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3",
+ "tool_shed_repository": {
+ "changeset_revision": "5813f9920877",
+ "name": "scanpy_filter",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"adata\": {\"__class__\": \"ConnectedValue\"}, \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.filter_cells\", \"__current_case__\": 0, \"filter\": {\"filter\": \"min_counts\", \"__current_case__\": 0, \"min_counts\": \"10\"}}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.10.2+galaxy3",
+ "type": "tool",
+ "uuid": "b559d7d0-48f4-498e-bd9c-de3c03b1b5b6",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "3": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3",
+ "errors": null,
+ "id": 3,
+ "input_connections": {
+ "adata": {
+ "id": 2,
+ "output_name": "anndata_out"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scanpy filter",
+ "outputs": [
+ {
+ "name": "anndata_out",
+ "type": "h5ad"
+ }
+ ],
+ "position": {
+ "left": 900,
+ "top": 170
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3",
+ "tool_shed_repository": {
+ "changeset_revision": "5813f9920877",
+ "name": "scanpy_filter",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"adata\": {\"__class__\": \"ConnectedValue\"}, \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.highly_variable_genes\", \"__current_case__\": 4, \"flavor\": {\"flavor\": \"seurat_v3\", \"__current_case__\": 2, \"n_top_genes\": \"130\", \"span\": \"0.3\"}, \"n_bins\": \"20\", \"subset\": false, \"layer\": \"\", \"batch_key\": \"\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.10.2+galaxy3",
+ "type": "tool",
+ "uuid": "138aaa62-b577-4f7d-b61c-91c2fa4970ff",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "4": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_normalize/scanpy_normalize/1.10.2+galaxy2",
+ "errors": null,
+ "id": 4,
+ "input_connections": {
+ "adata": {
+ "id": 3,
+ "output_name": "anndata_out"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scanpy normalize",
+ "outputs": [
+ {
+ "name": "anndata_out",
+ "type": "h5ad"
+ }
+ ],
+ "position": {
+ "left": 1200,
+ "top": 170
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_normalize/scanpy_normalize/1.10.2+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "4766556db52c",
+ "name": "scanpy_normalize",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"adata\": {\"__class__\": \"ConnectedValue\"}, \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.normalize_total\", \"__current_case__\": 0, \"target_sum\": null, \"exclude_highly_expressed\": {\"exclude_highly_expressed\": \"False\", \"__current_case__\": 1}, \"key_added\": \"\", \"layer\": \"\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.10.2+galaxy2",
+ "type": "tool",
+ "uuid": "1c1114e1-d5b2-4db0-9889-b4ca7e432378",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "5": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_inspect/scanpy_inspect/1.10.2+galaxy2",
+ "errors": null,
+ "id": 5,
+ "input_connections": {
+ "adata": {
+ "id": 4,
+ "output_name": "anndata_out"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scanpy Inspect and manipulate",
+ "outputs": [
+ {
+ "name": "anndata_out",
+ "type": "h5ad"
+ }
+ ],
+ "position": {
+ "left": 1500,
+ "top": 170
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_inspect/scanpy_inspect/1.10.2+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "3b5ef3b00cdd",
+ "name": "scanpy_inspect",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"adata\": {\"__class__\": \"ConnectedValue\"}, \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.log1p\", \"__current_case__\": 6, \"base\": null, \"layer\": \"\", \"obsm\": \"\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.10.2+galaxy2",
+ "type": "tool",
+ "uuid": "cc81fd04-ed0d-46b1-8bcc-576f7381b303",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "6": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_cluster_reduce_dimension/scanpy_cluster_reduce_dimension/1.10.2+galaxy2",
+ "errors": null,
+ "id": 6,
+ "input_connections": {
+ "adata": {
+ "id": 5,
+ "output_name": "anndata_out"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scanpy cluster, embed",
+ "outputs": [
+ {
+ "name": "anndata_out",
+ "type": "h5ad"
+ }
+ ],
+ "position": {
+ "left": 1800,
+ "top": 170
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_cluster_reduce_dimension/scanpy_cluster_reduce_dimension/1.10.2+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "a84a73471be7",
+ "name": "scanpy_cluster_reduce_dimension",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"adata\": {\"__class__\": \"ConnectedValue\"}, \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.pca\", \"__current_case__\": 2, \"n_comps\": \"50\", \"layer\": \"\", \"dtype\": \"float32\", \"pca\": {\"chunked\": \"False\", \"__current_case__\": 1, \"zero_center\": true, \"svd_solver\": null, \"random_state\": \"0\"}, \"mask_var\": \"\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.10.2+galaxy2",
+ "type": "tool",
+ "uuid": "85271e6e-a1ee-4629-a906-42a2babf8f28",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "7": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_inspect/scanpy_inspect/1.10.2+galaxy2",
+ "errors": null,
+ "id": 7,
+ "input_connections": {
+ "adata": {
+ "id": 6,
+ "output_name": "anndata_out"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scanpy Inspect and manipulate",
+ "outputs": [
+ {
+ "name": "anndata_out",
+ "type": "h5ad"
+ }
+ ],
+ "position": {
+ "left": 2100,
+ "top": 170
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_inspect/scanpy_inspect/1.10.2+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "3b5ef3b00cdd",
+ "name": "scanpy_inspect",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"adata\": {\"__class__\": \"ConnectedValue\"}, \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.neighbors\", \"__current_case__\": 1, \"n_neighbors\": \"15\", \"n_pcs\": null, \"use_rep\": \"\", \"knn\": true, \"pp_neighbors_method\": \"umap\", \"metric\": \"euclidean\", \"random_state\": \"0\", \"key_added\": \"\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.10.2+galaxy2",
+ "type": "tool",
+ "uuid": "e6f27f77-9dd2-483e-ada4-506b53c63a42",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "8": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_cluster_reduce_dimension/scanpy_cluster_reduce_dimension/1.10.2+galaxy2",
+ "errors": null,
+ "id": 8,
+ "input_connections": {
+ "adata": {
+ "id": 7,
+ "output_name": "anndata_out"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scanpy cluster, embed",
+ "outputs": [
+ {
+ "name": "anndata_out",
+ "type": "h5ad"
+ }
+ ],
+ "position": {
+ "left": 2400,
+ "top": 170
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_cluster_reduce_dimension/scanpy_cluster_reduce_dimension/1.10.2+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "a84a73471be7",
+ "name": "scanpy_cluster_reduce_dimension",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"adata\": {\"__class__\": \"ConnectedValue\"}, \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"tl.umap\", \"__current_case__\": 5, \"min_dist\": \"0.5\", \"spread\": \"1.0\", \"n_components\": \"2\", \"maxiter\": null, \"alpha\": \"1.0\", \"gamma\": \"1.0\", \"negative_sample_rate\": \"5\", \"init_pos\": \"spectral\", \"random_state\": \"0\", \"neighbors_key\": \"\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.10.2+galaxy2",
+ "type": "tool",
+ "uuid": "66b0b7cb-b0d5-453b-bcc0-0e5b2cda6975",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "9": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_cluster_reduce_dimension/scanpy_cluster_reduce_dimension/1.10.2+galaxy2",
+ "errors": null,
+ "id": 9,
+ "input_connections": {
+ "adata": {
+ "id": 8,
+ "output_name": "anndata_out"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scanpy cluster, embed",
+ "outputs": [
+ {
+ "name": "anndata_out",
+ "type": "h5ad"
+ }
+ ],
+ "position": {
+ "left": 2700,
+ "top": 170
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_cluster_reduce_dimension/scanpy_cluster_reduce_dimension/1.10.2+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "a84a73471be7",
+ "name": "scanpy_cluster_reduce_dimension",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"adata\": {\"__class__\": \"ConnectedValue\"}, \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"tl.leiden\", \"__current_case__\": 1, \"resolution\": \"1.0\", \"random_state\": \"0\", \"key_added\": \"leiden\", \"directed\": false, \"use_weights\": true, \"n_iterations\": \"-1\", \"neighbors_key\": \"\", \"flavor\": \"leidenalg\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.10.2+galaxy2",
+ "type": "tool",
+ "uuid": "5a6663f5-3cf0-491c-b5e5-6f893f979541",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "10": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/goeckslab/squidpy_scatter/squidpy_scatter/1.5.0+galaxy2",
+ "errors": null,
+ "id": 10,
+ "input_connections": {
+ "anndata": {
+ "id": 9,
+ "output_name": "anndata_out"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Create spatial scatterplot",
+ "outputs": [
+ {
+ "name": "output_plot",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 3000,
+ "top": 0
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/goeckslab/squidpy_scatter/squidpy_scatter/1.5.0+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "b84c324b58bd",
+ "name": "squidpy_scatter",
+ "owner": "goeckslab",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"aesthetic_opts\": {\"size\": \"3\", \"alpha\": \"1.0\", \"cmap\": \"viridis\", \"colorbar\": true, \"scalebar_dx\": null, \"scalebar_units\": \"\", \"title\": null, \"figsize\": \"\", \"dpi\": \"300\"}, \"anndata\": {\"__class__\": \"ConnectedValue\"}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"color\": \"leiden\", \"legend_opts\": {\"legend_loc\": \"right margin\", \"legend_fontsize\": \"8\", \"legend_fontweight\": \"bold\", \"legend_fontoutline\": null, \"legend_na\": true}, \"plot_opts\": {\"groups\": null, \"use_raw\": false, \"crop_coord\": null}, \"x_coord\": \"X_centroid\", \"y_coord\": \"Y_centroid\", \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.5.0+galaxy2",
+ "type": "tool",
+ "uuid": "b40a4997-ebfb-4253-b594-fe6bc8ac8b2b",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "11": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2",
+ "errors": null,
+ "id": 11,
+ "input_connections": {
+ "adata": {
+ "id": 9,
+ "output_name": "anndata_out"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scanpy plot",
+ "outputs": [
+ {
+ "name": "out_png",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 3000,
+ "top": 170
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_plot/scanpy_plot/1.10.2+galaxy2",
+ "tool_shed_repository": {
+ "changeset_revision": "5a5468c0d26f",
+ "name": "scanpy_plot",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"adata\": {\"__class__\": \"ConnectedValue\"}, \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"format\": \"png\", \"method\": {\"method\": \"pl.umap\", \"__current_case__\": 16, \"color\": \"total_counts,n_genes_by_counts,leiden\", \"gene_symbols\": \"\", \"use_raw\": false, \"edges\": {\"edges\": \"False\", \"__current_case__\": 1}, \"arrows\": false, \"sort_order\": true, \"groups\": \"\", \"plot\": {\"components\": [], \"projection\": \"2d\", \"legend_loc\": \"right margin\", \"legend_fontsize\": null, \"legend_fontweight\": \"bold\", \"size\": null, \"color_map\": null, \"palette\": null, \"frameon\": true, \"ncols\": \"4\", \"wspace\": \"0.1\", \"hspace\": \"0.25\", \"title\": \"\"}, \"add_outline\": false, \"outline_color_border\": null, \"outline_color_gap\": null, \"outline_width_border\": \"0.4\", \"outline_width_gap\": \"0.05\", \"matplotlib_pyplot_scatter\": {\"alpha\": null, \"vmin\": null, \"vmax\": null, \"linewidths\": \"0.0\", \"edgecolors\": null}, \"layer\": \"\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.10.2+galaxy2",
+ "type": "tool",
+ "uuid": "267a871c-647d-4d61-b8b7-3b88f447fcfc",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "12": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/goeckslab/squidpy/squidpy_spatial/1.5.0+galaxy0",
+ "errors": null,
+ "id": 12,
+ "input_connections": {
+ "anndata": {
+ "id": 9,
+ "output_name": "anndata_out"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Analyze and visualize spatial multi-omics data",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "h5ad"
+ }
+ ],
+ "position": {
+ "left": 3000,
+ "top": 340
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/goeckslab/squidpy/squidpy_spatial/1.5.0+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "11ea000ad53f",
+ "name": "squidpy",
+ "owner": "goeckslab",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"analyses\": {\"selected_tool\": \"spatial_neighbors\", \"__current_case__\": 0, \"options\": {\"spatial_key\": \"spatial\", \"coord_type\": \"generic\", \"n_neighs\": \"6\", \"radius\": \"\", \"delaunay\": true, \"n_rings\": \"1\", \"transform\": \"none\", \"set_diag\": false, \"key_added\": \"spatial\"}}, \"anndata\": {\"__class__\": \"ConnectedValue\"}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.5.0+galaxy0",
+ "type": "tool",
+ "uuid": "bb8b3c89-26ed-4058-8541-f46943d93539",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "13": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/goeckslab/squidpy/squidpy_spatial/1.5.0+galaxy0",
+ "errors": null,
+ "id": 13,
+ "input_connections": {
+ "anndata": {
+ "id": 12,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Analyze and visualize spatial multi-omics data",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "h5ad"
+ },
+ {
+ "name": "output_plot",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 3300,
+ "top": 340
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/goeckslab/squidpy/squidpy_spatial/1.5.0+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "11ea000ad53f",
+ "name": "squidpy",
+ "owner": "goeckslab",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"analyses\": {\"selected_tool\": \"centrality_scores\", \"__current_case__\": 3, \"cluster_key\": \"leiden\", \"options\": {\"score\": \"none\", \"connectivity_key\": \"\"}, \"plotting_options\": {\"score\": \"\", \"palette\": \"YlOrRd\", \"figsize\": \"\", \"dpi\": null}}, \"anndata\": {\"__class__\": \"ConnectedValue\"}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.5.0+galaxy0",
+ "type": "tool",
+ "uuid": "57555c9e-e5b3-4176-9cc5-19719bdb3e5f",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "14": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/goeckslab/squidpy/squidpy_spatial/1.5.0+galaxy0",
+ "errors": null,
+ "id": 14,
+ "input_connections": {
+ "anndata": {
+ "id": 13,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Analyze and visualize spatial multi-omics data",
+ "outputs": [
+ {
+ "name": "output",
+ "type": "h5ad"
+ },
+ {
+ "name": "output_plot",
+ "type": "png"
+ }
+ ],
+ "position": {
+ "left": 3600,
+ "top": 340
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/goeckslab/squidpy/squidpy_spatial/1.5.0+galaxy0",
+ "tool_shed_repository": {
+ "changeset_revision": "11ea000ad53f",
+ "name": "squidpy",
+ "owner": "goeckslab",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"analyses\": {\"selected_tool\": \"ripley\", \"__current_case__\": 5, \"cluster_key\": \"leiden\", \"options\": {\"mode\": \"F\", \"spatial_key\": \"spatial\", \"metric\": \"euclidean\", \"n_neigh\": \"2\", \"n_simulations\": \"100\", \"n_observations\": \"1000\", \"max_dist\": null, \"n_steps\": \"50\", \"seed\": null}, \"plotting_options\": {\"mode\": \"F\", \"plot_sims\": true, \"palette\": \"YlOrRd\", \"figsize\": \"\", \"dpi\": null}}, \"anndata\": {\"__class__\": \"ConnectedValue\"}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.5.0+galaxy0",
+ "type": "tool",
+ "uuid": "192209cd-d3b4-4ffc-9503-ff63e4db855e",
+ "when": null,
+ "workflow_outputs": []
+ },
+ "15": {
+ "annotation": "",
+ "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3",
+ "errors": null,
+ "id": 15,
+ "input_connections": {
+ "adata": {
+ "id": 13,
+ "output_name": "output"
+ }
+ },
+ "inputs": [],
+ "label": null,
+ "name": "Scanpy filter",
+ "outputs": [
+ {
+ "name": "anndata_out",
+ "type": "h5ad"
+ }
+ ],
+ "position": {
+ "left": 3600,
+ "top": 560
+ },
+ "post_job_actions": {},
+ "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/scanpy_filter/scanpy_filter/1.10.2+galaxy3",
+ "tool_shed_repository": {
+ "changeset_revision": "5813f9920877",
+ "name": "scanpy_filter",
+ "owner": "iuc",
+ "tool_shed": "toolshed.g2.bx.psu.edu"
+ },
+ "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"4ffe3872d04e11f08b46b42e99728f38\", \"adata\": {\"__class__\": \"ConnectedValue\"}, \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"pp.subsample\", \"__current_case__\": 5, \"type\": {\"type\": \"fraction\", \"__current_case__\": 0, \"fraction\": \"0.5\"}, \"random_state\": \"0\"}, \"__page__\": 0, \"__rerun_remap_job_id__\": null}",
+ "tool_version": "1.10.2+galaxy3",
+ "type": "tool",
+ "uuid": "a0d3b539-26e7-4f33-a0df-42b0c480462d",
+ "when": null,
+ "workflow_outputs": []
+ }
+ },
+ "tags": [],
+ "uuid": "2ca53904-6e7e-43e1-a769-e873d8636c61",
+ "version": 1
+}
\ No newline at end of file