diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
index 185f6678d3..9496e6000c 100644
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@@ -210,6 +210,7 @@ jobs:
           pip install -U pip
           pip install --group ./firedrake-repo/pyproject.toml:ci
 
+          pip install -I "firedrake-fiat @ git+https://github.com/firedrakeproject/fiat.git@pbrubeck/fix/permutations"
           firedrake-clean
           pip list
 
diff --git a/firedrake/assemble.py b/firedrake/assemble.py
index 04e4bd56b1..95b70e5824 100644
--- a/firedrake/assemble.py
+++ b/firedrake/assemble.py
@@ -353,7 +353,9 @@ def allocate(self):
             else:
                 test, trial = self._form.arguments()
                 sparsity = ExplicitMatrixAssembler._make_sparsity(test, trial, self._mat_type, self._sub_mat_type, self.maps_and_regions)
-                return matrix.Matrix(self._form, self._bcs, self._mat_type, sparsity, ScalarType, options_prefix=self._options_prefix)
+                return matrix.Matrix(self._form, self._bcs, self._mat_type, sparsity, ScalarType,
+                                     sub_mat_type=self._sub_mat_type,
+                                     options_prefix=self._options_prefix)
         else:
             raise NotImplementedError("Only implemented for rank = 2 and diagonal = False")
 
@@ -1295,12 +1297,12 @@ def _get_mat_type(mat_type, sub_mat_type, arguments):
                for arg in arguments
                for V in arg.function_space()):
             mat_type = "nest"
-    if mat_type not in {"matfree", "aij", "baij", "nest", "dense"}:
+    if mat_type not in {"matfree", "aij", "baij", "nest", "dense", "is"}:
         raise ValueError(f"Unrecognised matrix type, '{mat_type}'")
     if sub_mat_type is None:
         sub_mat_type = parameters.parameters["default_sub_matrix_type"]
-    if sub_mat_type not in {"aij", "baij"}:
-        raise ValueError(f"Invalid submatrix type, '{sub_mat_type}' (not 'aij' or 'baij')")
+    if sub_mat_type not in {"aij", "baij", "is"}:
+        raise ValueError(f"Invalid submatrix type, '{sub_mat_type}' (not 'aij', 'baij', or 'is')")
     return mat_type, sub_mat_type
 
 
@@ -1344,6 +1346,7 @@ def allocate(self):
                                                           self._sub_mat_type,
                                                           self._make_maps_and_regions())
         return matrix.Matrix(self._form, self._bcs, self._mat_type, sparsity, ScalarType,
+                             sub_mat_type=self._sub_mat_type,
                              options_prefix=self._options_prefix,
                              fc_params=self._form_compiler_params)
 
@@ -1366,6 +1369,18 @@ def _make_sparsity(test, trial, mat_type, sub_mat_type, maps_and_regions):
         except SparsityFormatError:
             raise ValueError("Monolithic matrix assembly not supported for systems "
                              "with R-space blocks")
+
+        # TODO reconstruct dof_dset with the unghosted lgmap
+        if mat_type == "is":
+            rmap = unghosted_lgmap(sparsity._dsets[0].lgmap, test.function_space())
+            cmap = unghosted_lgmap(sparsity._dsets[1].lgmap, trial.function_space())
+            sparsity._lgmaps = (rmap, cmap)
+        elif mat_type == "nest" and sub_mat_type == "is":
+            for i, j in numpy.ndindex(sparsity.shape):
+                block = sparsity[i, j]
+                rmap = unghosted_lgmap(block._dsets[0].lgmap, test.function_space()[i])
+                cmap = unghosted_lgmap(block._dsets[1].lgmap, trial.function_space()[j])
+                block._lgmaps = (rmap, cmap)
         return sparsity
 
     def _make_maps_and_regions(self):
@@ -1461,7 +1476,6 @@ def _apply_bc(self, tensor, bc, u=None):
             # block is on the matrix diagonal and its index matches the
             # index of the function space the bc is defined on.
             op2tensor[index, index].set_local_diagonal_entries(bc.nodes, idx=component, diag_val=self.weight)
-
             # Handle off-diagonal block involving real function space.
             # "lgmaps" is correctly constructed in _matrix_arg, but
             # is ignored by PyOP2 in this case.
@@ -2177,3 +2191,49 @@ def index_function_spaces(form, indices):
             return tuple(a.ufl_function_space()[i] for i, a in zip(indices, form.arguments()))
         else:
             raise AssertionError
+
+
+def masked_lgmap(lgmap, mask, block=True):
+    if block:
+        indices = lgmap.block_indices.copy()
+        bsize = lgmap.getBlockSize()
+    else:
+        indices = lgmap.indices.copy()
+        bsize = 1
+
+    if len(mask) > 0:
+        indices[mask] = -1
+    return PETSc.LGMap().create(indices=indices, bsize=bsize, comm=lgmap.comm)
+
+
+def unghosted_lgmap(lgmap, V, block=True):
+    if block:
+        ndofs = lgmap.getBlockIndices().size
+    else:
+        ndofs = lgmap.getIndices().size
+    mask = numpy.arange(ndofs, dtype=PETSc.IntType)
+
+    mesh = V._mesh
+    mesh_dm = mesh.topology_dm
+    start, end = mesh_dm.getHeightStratum(0)
+    for i, W in enumerate(V):
+        iset = V.dof_dset.local_ises[i]
+        W_local_indices = iset.indices
+        bsize = 1 if block else iset.getBlockSize()
+        section = W.dm.getDefaultSection()
+        for seed in range(start, end):
+            # Do not loop over ghost cells
+            if mesh_dm.getLabelValue("pyop2_ghost", seed) != -1:
+                continue
+            closure, _ = mesh_dm.getTransitiveClosure(seed, useCone=True)
+            for p in closure:
+                dof = section.getDof(p)
+                if dof <= 0:
+                    continue
+                off = section.getOffset(p)
+                # Local indices within W
+                W_indices = slice(bsize * off, bsize * (off + dof))
+                mask[W_local_indices[W_indices]] = -1
+
+    mask = mask[mask > -1]
+    return masked_lgmap(lgmap, mask, block=block)
diff --git a/firedrake/matrix_free/operators.py b/firedrake/matrix_free/operators.py
index 18a767557d..12a5611945 100644
--- a/firedrake/matrix_free/operators.py
+++ b/firedrake/matrix_free/operators.py
@@ -377,11 +377,20 @@ def createSubMatrix(self, mat, row_is, col_is, target=None):
         row_ises = self._y.function_space().dof_dset.field_ises
         col_ises = self._x.function_space().dof_dset.field_ises
 
-        row_inds = find_sub_block(row_is, row_ises, comm=self.comm)
-        if row_is == col_is and row_ises == col_ises:
-            col_inds = row_inds
-        else:
-            col_inds = find_sub_block(col_is, col_ises, comm=self.comm)
+        try:
+            row_inds = find_sub_block(row_is, row_ises, comm=self.comm)
+            if row_is == col_is and row_ises == col_ises:
+                col_inds = row_inds
+            else:
+                col_inds = find_sub_block(col_is, col_ises, comm=self.comm)
+        except LookupError:
+            # No match for sub_block
+            # use default PETSc implementation via MATSHELL
+            popmethod = self.createSubMatrix
+            self.createSubMatrix = None
+            submat = mat.createSubMatrix(row_is, col_is)
+            self.createSubMatrix = popmethod
+            return submat
 
         splitter = ExtractSubBlock()
         asub = splitter.split(self.a,
diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py
index 57580b6f86..24a4c7afc2 100644
--- a/firedrake/preconditioners/bddc.py
+++ b/firedrake/preconditioners/bddc.py
@@ -4,8 +4,11 @@
 from firedrake.petsc import PETSc
 from firedrake.dmhooks import get_function_space, get_appctx
 from firedrake.ufl_expr import TestFunction, TrialFunction
+from firedrake.function import Function
 from firedrake.functionspace import FunctionSpace, VectorFunctionSpace, TensorFunctionSpace
-from ufl import curl, div, HCurl, HDiv, inner, dx
+from firedrake.preconditioners.fdm import tabulate_exterior_derivative
+from firedrake.preconditioners.hiptmair import curl_to_grad
+from ufl import H1, H2, inner, dx, JacobianDeterminant
 from pyop2.utils import as_tuple
 import numpy
 
@@ -23,17 +26,13 @@ class BDDCPC(PCBase):
     - ``'bddc_pc_bddc_dirichlet'`` to set sub-KSPs on subdomain interiors,
     - ``'bddc_pc_bddc_coarse'`` to set the coarse solver KSP.
 
-    This PC also inspects optional arguments supplied in the application context:
-    - ``'discrete_gradient'`` for problems in H(curl), this sets the arguments
-    (a Mat tabulating the gradient of the auxiliary H1 space) and
+    This PC also inspects optional callbacks supplied in the application context:
+    - ``'get_discrete_gradient'`` for 3D problems in H(curl), this is a callable that
+    provide the arguments (a Mat tabulating the gradient of the auxiliary H1 space) and
     keyword arguments supplied to ``PETSc.PC.setBDDCDiscreteGradient``.
-    - ``'divergence_mat'`` for 3D problems in H(div), this sets the Mat with the
-    assembled bilinear form testing the divergence against an L2 space.
-
-    Notes
-    -----
-    Currently the Mat type IS is only supported by FDMPC.
-
+    - ``'get_divergence_mat'`` for problems in H(div) (resp. 2D H(curl)), this is
+    provide the arguments (a Mat with the assembled bilinear form testing the divergence
+    (curl) against an L2 space) and keyword arguments supplied to ``PETSc.PC.setDivergenceMat``.
     """
 
     _prefix = "bddc_"
@@ -45,6 +44,8 @@ def initialize(self, pc):
         self.prefix = (pc.getOptionsPrefix() or "") + self._prefix
 
         V = get_function_space(dm)
+        variant = V.ufl_element().variant()
+        sobolev_space = V.ufl_element().sobolev_space
 
         # Create new PC object as BDDC type
         bddcpc = PETSc.PC().create(comm=pc.comm)
@@ -53,13 +54,19 @@ def initialize(self, pc):
         bddcpc.setOperators(*pc.getOperators())
         bddcpc.setType(PETSc.PC.Type.BDDC)
 
+        # Allow viewing preconditioning matrix for debugging purposes
+        P.viewFromOptions('-view_pmat', bddcpc)
+
         opts = PETSc.Options(bddcpc.getOptionsPrefix())
-        if V.ufl_element().variant() == "fdm" and "pc_bddc_use_local_mat_graph" not in opts:
-            # Disable computation of disconected components of subdomain interfaces
+        # Do not use CSR of local matrix to define dofs connectivity unless requested
+        # Using the CSR only makes sense for H1/H2 problems
+        is_h1h2 = sobolev_space in [H1, H2]
+        if "pc_bddc_use_local_mat_graph" not in opts and (not is_h1h2 or variant in {"fdm", "demkowicz", "demkowiczmass"}):
             opts["pc_bddc_use_local_mat_graph"] = False
 
+        # Handle boundary dofs
         ctx = get_appctx(dm)
-        bcs = tuple(ctx._problem.bcs)
+        bcs = tuple(ctx._problem.dirichlet_bcs())
         if V.extruded:
             boundary_nodes = numpy.unique(numpy.concatenate(list(map(V.boundary_nodes, ("on_boundary", "top", "bottom")))))
         else:
@@ -70,52 +77,44 @@ def initialize(self, pc):
             dir_nodes = numpy.unique(numpy.concatenate([bcdofs(bc, ghost=False) for bc in bcs]))
         neu_nodes = numpy.setdiff1d(boundary_nodes, dir_nodes)
 
-        V.dof_dset.lgmap.apply(dir_nodes, result=dir_nodes)
+        dir_nodes = V.dof_dset.lgmap.apply(dir_nodes)
         dir_bndr = PETSc.IS().createGeneral(dir_nodes, comm=pc.comm)
         bddcpc.setBDDCDirichletBoundaries(dir_bndr)
 
-        V.dof_dset.lgmap.apply(neu_nodes, result=neu_nodes)
+        neu_nodes = V.dof_dset.lgmap.apply(neu_nodes)
         neu_bndr = PETSc.IS().createGeneral(neu_nodes, comm=pc.comm)
         bddcpc.setBDDCNeumannBoundaries(neu_bndr)
 
         appctx = self.get_appctx(pc)
-        sobolev_space = V.ufl_element().sobolev_space
+        degree = max(as_tuple(V.ufl_element().degree()))
+
+        # Set coordinates only if corner selection is requested
+        # There's no API to query from PC
+        if "pc_bddc_corner_selection" in opts:
+            W = VectorFunctionSpace(V.mesh(), "Lagrange", degree, variant=variant)
+            coords = Function(W).interpolate(V.mesh().coordinates)
+            bddcpc.setCoordinates(coords.dat.data_ro.repeat(V.block_size, axis=0))
 
         tdim = V.mesh().topological_dimension()
-        degree = max(as_tuple(V.ufl_element().degree()))
         if tdim >= 2 and V.finat_element.formdegree == tdim-1:
-            B = appctx.get("divergence_mat", None)
-            if B is None:
-                from firedrake.assemble import assemble
-                d = {HCurl: curl, HDiv: div}[sobolev_space]
-                if V.shape == ():
-                    make_function_space = FunctionSpace
-                elif len(V.shape) == 1:
-                    make_function_space = VectorFunctionSpace
-                else:
-                    make_function_space = TensorFunctionSpace
-                Q = make_function_space(V.mesh(), "DG", degree-1)
-                b = inner(d(TrialFunction(V)), TestFunction(Q)) * dx(degree=2*(degree-1))
-                B = assemble(b, mat_type="matfree")
-            bddcpc.setBDDCDivergenceMat(B.petscmat)
-        elif sobolev_space == HCurl:
-            gradient = appctx.get("discrete_gradient", None)
-            if gradient is None:
-                from firedrake.preconditioners.fdm import tabulate_exterior_derivative
-                from firedrake.preconditioners.hiptmair import curl_to_grad
-                Q = FunctionSpace(V.mesh(), curl_to_grad(V.ufl_element()))
-                gradient = tabulate_exterior_derivative(Q, V)
-                corners = get_vertex_dofs(Q)
-                gradient.compose('_elements_corners', corners)
+            allow_repeated = P.getISAllowRepeated()
+            get_divergence = appctx.get("get_divergence_mat", get_divergence_mat)
+            divergence = get_divergence(V, mat_type="is", allow_repeated=allow_repeated)
+            try:
+                div_args, div_kwargs = divergence
+            except ValueError:
+                div_args = (divergence,)
+                div_kwargs = dict()
+            bddcpc.setBDDCDivergenceMat(*div_args, **div_kwargs)
+
+        elif tdim >= 3 and V.finat_element.formdegree == 1:
+            get_gradient = appctx.get("get_discrete_gradient", get_discrete_gradient)
+            gradient = get_gradient(V)
+            try:
+                grad_args, grad_kwargs = gradient
+            except ValueError:
                 grad_args = (gradient,)
-                grad_kwargs = {'order': degree}
-            else:
-                try:
-                    grad_args, grad_kwargs = gradient
-                except ValueError:
-                    grad_args = (gradient,)
-                    grad_kwargs = dict()
-
+                grad_kwargs = dict()
             bddcpc.setBDDCDiscreteGradient(*grad_args, **grad_kwargs)
 
         bddcpc.setFromOptions()
@@ -134,9 +133,41 @@ def applyTranspose(self, pc, x, y):
         self.pc.applyTranspose(x, y)
 
 
-def get_vertex_dofs(V):
-    W = FunctionSpace(V.mesh(), restrict(V.ufl_element(), "vertex"))
+def get_restricted_dofs(V, domain):
+    W = FunctionSpace(V.mesh(), restrict(V.ufl_element(), domain))
     indices = get_restriction_indices(V, W)
-    V.dof_dset.lgmap.apply(indices, result=indices)
-    vertex_dofs = PETSc.IS().createGeneral(indices, comm=V.comm)
-    return vertex_dofs
+    indices = V.dof_dset.lgmap.apply(indices)
+    return PETSc.IS().createGeneral(indices, comm=V.comm)
+
+
+def get_divergence_mat(V, mat_type="is", allow_repeated=False):
+    from firedrake import assemble
+    degree = max(as_tuple(V.ufl_element().degree()))
+    Q = TensorFunctionSpace(V.mesh(), "DG", 0, variant=f"integral({degree-1})", shape=V.value_shape[:-1])
+    B = tabulate_exterior_derivative(V, Q, mat_type=mat_type, allow_repeated=allow_repeated)
+
+    Jdet = JacobianDeterminant(V.mesh())
+    s = assemble(inner(TrialFunction(Q)*(1/Jdet), TestFunction(Q))*dx(degree=0), diagonal=True)
+    with s.dat.vec as svec:
+        B.diagonalScale(svec, None)
+    return (B,), {}
+
+
+def get_discrete_gradient(V):
+    from firedrake import Constant
+    from firedrake.nullspace import VectorSpaceBasis
+
+    Q = FunctionSpace(V.mesh(), curl_to_grad(V.ufl_element()))
+    gradient = tabulate_exterior_derivative(Q, V)
+    nsp = VectorSpaceBasis([Function(Q).interpolate(Constant(1))])
+    nsp.orthonormalize()
+    gradient.setNearNullSpace(nsp.nullspace())
+    variant = Q.ufl_element().variant()
+    if variant in {"fdm", "demkowicz", "demkowiczmass"}:
+        vdofs = get_restricted_dofs(Q, "vertex")
+        gradient.compose('_elements_corners', vdofs)
+
+    degree = max(as_tuple(Q.ufl_element().degree()))
+    grad_args = (gradient,)
+    grad_kwargs = {'order': degree}
+    return grad_args, grad_kwargs
diff --git a/firedrake/preconditioners/facet_split.py b/firedrake/preconditioners/facet_split.py
index 26e3f984ca..fb71a81816 100644
--- a/firedrake/preconditioners/facet_split.py
+++ b/firedrake/preconditioners/facet_split.py
@@ -1,7 +1,7 @@
 from functools import partial
 from mpi4py import MPI
 from pyop2 import op2, PermutedMap
-from finat.ufl import RestrictedElement, MixedElement, TensorElement, VectorElement
+from finat.ufl import BrokenElement, RestrictedElement, MixedElement, TensorElement, VectorElement
 from firedrake.petsc import PETSc
 from firedrake.preconditioners.base import PCBase
 import firedrake.dmhooks as dmhooks
@@ -206,7 +206,9 @@ def restrict(ele, restriction_domain):
     if isinstance(ele, VectorElement):
         return type(ele)(restrict(ele._sub_element, restriction_domain), dim=ele.num_sub_elements)
     elif isinstance(ele, TensorElement):
-        return type(ele)(restrict(ele._sub_element, restriction_domain), shape=ele._shape, symmetry=ele._symmety)
+        return type(ele)(restrict(ele._sub_element, restriction_domain), shape=ele._shape, symmetry=ele._symmetry)
+    elif restriction_domain == "broken":
+        return BrokenElement(ele)
     else:
         return RestrictedElement(ele, restriction_domain)
 
@@ -239,7 +241,7 @@ def restricted_dofs(celem, felem):
     cdofs = celem.entity_dofs()
     fdofs = felem.entity_dofs()
     for dim in sorted(cdofs):
-        for entity in cdofs[dim]:
+        for entity in sorted(cdofs[dim]):
             ndofs = len(cdofs[dim][entity])
             indices[cdofs[dim][entity]] = fdofs[dim][entity][:ndofs]
     return indices
@@ -248,6 +250,9 @@ def restricted_dofs(celem, felem):
 def get_restriction_indices(V, W):
     """Return the list of dofs in the space V such that W = V[indices].
     """
+    if V.cell_node_map() is W.cell_node_map():
+        return numpy.arange(V.dof_dset.layout_vec.getSizes()[0], dtype=PETSc.IntType)
+
     vdat = V.make_dat(val=numpy.arange(V.dof_count, dtype=PETSc.IntType))
     wdats = [Wsub.make_dat(val=numpy.full((Wsub.dof_count,), -1, dtype=PETSc.IntType)) for Wsub in W]
     wdat = wdats[0] if len(W) == 1 else op2.MixedDat(wdats)
diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py
index 042956ca8f..959bf5fd54 100644
--- a/firedrake/preconditioners/fdm.py
+++ b/firedrake/preconditioners/fdm.py
@@ -8,7 +8,7 @@
                                            evaluate_dual,
                                            get_permutation_to_nodal_elements,
                                            cache_generate_code)
-from firedrake.preconditioners.facet_split import split_dofs, restricted_dofs
+from firedrake.preconditioners.facet_split import restrict, restricted_dofs, split_dofs
 from firedrake.formmanipulation import ExtractSubBlock
 from firedrake.functionspace import FunctionSpace, MixedFunctionSpace
 from firedrake.function import Function
@@ -40,33 +40,6 @@
 __all__ = ("FDMPC", "PoissonFDMPC")
 
 
-def broken_function(V, val):
-    W = FunctionSpace(V.mesh(), finat.ufl.BrokenElement(V.ufl_element()))
-    w = Function(W, dtype=val.dtype)
-    v = Function(V, val=val)
-    domain = "{[i]: 0 <= i < v.dofs}"
-    instructions = """
-    for i
-        w[i] = v[i]
-    end
-    """
-    par_loop((domain, instructions), ufl.dx, {'w': (w, op2.WRITE), 'v': (v, op2.READ)})
-    return w
-
-
-def mask_local_indices(V, lgmap, repeated=False):
-    mask = lgmap.indices
-    if repeated:
-        w = broken_function(V, mask)
-        V = w.function_space()
-        mask = w.dat.data_ro_with_halos
-    indices = numpy.arange(len(mask), dtype=PETSc.IntType)
-    indices[mask == -1] = -1
-    indices_dat = V.make_dat(val=indices)
-    indices_acc = indices_dat(op2.READ, V.cell_node_map())
-    return indices_acc
-
-
 class FDMPC(PCBase):
     """
     A preconditioner for tensor-product elements that changes the shape
@@ -83,7 +56,7 @@ class FDMPC(PCBase):
     matrices.
 
     The PETSc options inspected by this class are:
-    - 'fdm_mat_type': can be either 'aij' or 'sbaij'
+    - 'fdm_mat_type': can be either 'aij', 'sbaij', or 'is'
     - 'fdm_static_condensation': are we assembling the Schur complement on facets?
     """
 
@@ -134,7 +107,10 @@ def initialize(self, pc):
 
         # Transform the problem into the space with FDM shape functions
         V = J.arguments()[-1].function_space()
-        V_fdm = V.reconstruct(variant=self._variant)
+        if self._variant == "fdm" and V.ufl_element().variant() in {"fdm", "demkowicz", "demkowiczmass"}:
+            V_fdm = V
+        else:
+            V_fdm = V.reconstruct(variant=self._variant)
         if V == V_fdm:
             J_fdm, bcs_fdm = (J, bcs)
         else:
@@ -169,7 +145,7 @@ def initialize(self, pc):
                 self.bc_nodes = numpy.empty(0, dtype=PETSc.IntType)
 
         # Internally, we just set up a PC object that the user can configure
-        # however from the PETSc command line.  Since PC allows the user to specify
+        # however from the PETSc command line. Since PC allows the user to specify
         # a KSP, we can do iterative by -fdm_pc_type ksp.
         fdmpc = PETSc.PC().create(comm=pc.comm)
         fdmpc.incrementTabLevel(1, parent=pc)
@@ -218,15 +194,15 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati
             Vfacet = None
             Vbig = V
             ebig = V.ufl_element()
-            _, fdofs = split_dofs(V.finat_element)
+            idofs, fdofs = split_dofs(V.finat_element)
         elif len(ifacet) == 1:
             Vfacet = V[ifacet[0]]
             ebig, = set(unrestrict_element(Vsub.ufl_element()) for Vsub in V)
             Vbig = FunctionSpace(V.mesh(), ebig)
-            if len(V) > 1:
-                dims = [Vsub.finat_element.space_dimension() for Vsub in V]
-                assert sum(dims) == Vbig.finat_element.space_dimension()
+            space_dim = Vbig.finat_element.space_dimension()
+            assert space_dim == sum(Vsub.finat_element.space_dimension() for Vsub in V)
             fdofs = restricted_dofs(Vfacet.finat_element, Vbig.finat_element)
+            idofs = numpy.setdiff1d(numpy.arange(space_dim, dtype=fdofs.dtype), fdofs)
         else:
             raise ValueError("Expecting at most one FunctionSpace restricted onto facets.")
         self.embedding_element = ebig
@@ -245,7 +221,7 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati
 
         # Dictionary with kernel to compute the Schur complement
         self.schur_kernel = {}
-        if V == Vbig and Vbig.finat_element.formdegree == 0:
+        if V == Vbig and Vbig.finat_element.formdegree == 0 and len(idofs) > 0 and pmat_type.endswith("aij"):
             # If we are in H(grad), we just pad with zeros on the statically-condensed pattern
             self.schur_kernel[V] = SchurComplementPattern
         elif Vfacet and use_static_condensation:
@@ -710,64 +686,28 @@ def insert_mode(self):
     def assembly_lgmaps(self):
         if self.mat_type != "is":
             return {Vsub: Vsub.dof_dset.lgmap for Vsub in self.V}
-        lgmaps = {}
-        for Vsub in self.V:
-            lgmap = Vsub.dof_dset.lgmap
-            if self.allow_repeated:
-                indices = broken_function(Vsub, lgmap.indices).dat.data_ro
-            else:
-                indices = lgmap.indices.copy()
-                local_indices = numpy.arange(len(indices), dtype=PETSc.IntType)
-                cell_node_map = broken_function(Vsub, local_indices).dat.data_ro
-                ghost = numpy.setdiff1d(local_indices, numpy.unique(cell_node_map), assume_unique=True)
-                indices[ghost] = -1
-            lgmaps[Vsub] = PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm())
-        return lgmaps
+        return {Vsub: unghosted_lgmap(Vsub, Vsub.dof_dset.lgmap, self.allow_repeated) for Vsub in self.V}
 
     def setup_block(self, Vrow, Vcol):
-        # Preallocate the auxiliary sparse operator
+        """Preallocate the auxiliary sparse operator."""
         sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol))
         rmap = self.assembly_lgmaps[Vrow]
         cmap = self.assembly_lgmaps[Vcol]
         on_diag = Vrow == Vcol
         ptype = self.mat_type if on_diag else PETSc.Mat.Type.AIJ
 
-        preallocator = PETSc.Mat().create(comm=self.comm)
-        preallocator.setType(PETSc.Mat.Type.PREALLOCATOR)
-        preallocator.setSizes(sizes)
-        preallocator.setISAllowRepeated(self.allow_repeated)
-        preallocator.setLGMap(rmap, cmap)
-        preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False)
-        if ptype.endswith("sbaij"):
-            preallocator.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True)
-        preallocator.setUp()
+        preallocator = get_preallocator(self.comm, sizes, rmap, cmap, mat_type=ptype)
         self.set_values(preallocator, Vrow, Vcol)
         preallocator.assemble()
-        dnz, onz = get_preallocation(preallocator, sizes[0][0])
-        if on_diag:
-            numpy.maximum(dnz, 1, out=dnz)
+        P = allocate_matrix(preallocator, ptype, on_diag=on_diag, allow_repeated=self.allow_repeated)
         preallocator.destroy()
-        P = PETSc.Mat().create(comm=self.comm)
-        P.setType(ptype)
-        P.setSizes(sizes)
-        P.setISAllowRepeated(self.allow_repeated)
-        P.setLGMap(rmap, cmap)
-        if on_diag and ptype == "is" and self.allow_repeated:
-            bsize = Vrow.finat_element.space_dimension() * Vrow.value_size
+
+        if on_diag and P.type == "is" and self.allow_repeated:
+            bsize = Vrow.block_size * Vrow.finat_element.space_dimension()
             local_mat = P.getISLocalMat()
             nblocks = local_mat.getSize()[0] // bsize
-            local_mat.setVariableBlockSizes([bsize] * nblocks)
-        P.setPreallocationNNZ((dnz, onz))
-
-        if not (ptype.endswith("sbaij") or ptype == "is"):
-            P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, True)
-        P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True)
-        P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag)
-        P.setOption(PETSc.Mat.Option.FORCE_DIAGONAL_ENTRIES, True)
-        P.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True)
-        if ptype.endswith("sbaij"):
-            P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True)
-        P.setUp()
+            sizes = numpy.full((nblocks,), bsize, dtype=PETSc.IntType)
+            local_mat.setVariableBlockSizes(sizes)
         return P
 
     @PETSc.Log.EventDecorator("FDMSetValues")
@@ -1646,7 +1586,7 @@ def diff_blocks(tdim, formdegree, A00, A11, A10):
             A_blocks = [[A00.kron(A10)], [A10.kron(A00)]]
         elif formdegree == 1:
             A_blocks = [[A10.kron(A11), A11.kron(A10)]]
-            A_blocks[-1][-1].scale(-1)
+            A_blocks[0][0].scale(-1)
     elif tdim == 3:
         if formdegree == 0:
             A_blocks = [[kron3(A00, A00, A10)], [kron3(A00, A10, A00)], [kron3(A10, A00, A00)]]
@@ -1660,13 +1600,94 @@ def diff_blocks(tdim, formdegree, A00, A11, A10):
     return A_blocks
 
 
-def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[], comm=None):
-    """
-    Tabulate exterior derivative: Vc -> Vf as an explicit sparse matrix.
-    Works for any tensor-product basis. These are the same matrices one needs for HypreAMS and friends.
+def broken_function(V, val):
+    """Return a Function(V, val=val) interpolated onto the broken space."""
+    W = FunctionSpace(V.mesh(), restrict(V.ufl_element(), "broken"))
+    w = Function(W, dtype=val.dtype)
+    v = Function(V, val=val)
+    domain = "{[i]: 0 <= i < v.dofs}"
+    instructions = """
+    for i
+        w[i] = v[i]
+    end
     """
+    par_loop((domain, instructions), ufl.dx, {'w': (w, op2.WRITE), 'v': (v, op2.READ)})
+    return w
+
+
+def mask_local_indices(V, lgmap, allow_repeated):
+    """Return a numpy array with the masked local indices."""
+    mask = lgmap.indices
+    if allow_repeated:
+        w = broken_function(V, mask)
+        V = w.function_space()
+        mask = w.dat.data_ro_with_halos
+
+    indices = numpy.arange(mask.size, dtype=PETSc.IntType)
+    indices[mask == -1] = -1
+    indices_dat = V.make_dat(val=indices)
+    indices_acc = indices_dat(op2.READ, V.cell_node_map())
+    return indices_acc
+
+
+def unghosted_lgmap(V, lgmap, allow_repeated):
+    """Construct the local to global mapping for MatIS assembly."""
+    if allow_repeated:
+        indices = broken_function(V, lgmap.indices).dat.data_ro
+    else:
+        indices = lgmap.indices.copy()
+        local_indices = numpy.arange(indices.size, dtype=PETSc.IntType)
+        cell_node_map = broken_function(V, local_indices).dat.data_ro
+        ghost = numpy.setdiff1d(local_indices, numpy.unique(cell_node_map), assume_unique=True)
+        indices[ghost] = -1
+    return PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm())
+
+
+def get_preallocator(comm, sizes, rmap, cmap, mat_type=None):
+    """Set up a matrix preallocator."""
+    preallocator = PETSc.Mat().create(comm=comm)
+    preallocator.setType(PETSc.Mat.Type.PREALLOCATOR)
+    preallocator.setSizes(sizes)
+    preallocator.setLGMap(rmap, cmap)
+    preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False)
+    if mat_type is not None and mat_type.endswith("sbaij"):
+        preallocator.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True)
+    preallocator.setUp()
+    return preallocator
+
+
+def allocate_matrix(preallocator, mat_type, on_diag=False, allow_repeated=False):
+    """Set up a matrix from a preallocator."""
+    sizes = preallocator.getSizes()
+    nnz = get_preallocation(preallocator, sizes[0][0])
+    if on_diag:
+        numpy.maximum(nnz[0], 1, out=nnz[0])
+
+    A = PETSc.Mat().create(comm=preallocator.getComm())
+    A.setType(mat_type)
+    A.setSizes(sizes)
+    A.setBlockSize(preallocator.getBlockSize())
+    A.setISAllowRepeated(allow_repeated)
+    A.setLGMap(*preallocator.getLGMap())
+    A.setPreallocationNNZ(nnz)
+    if mat_type.endswith("sbaij"):
+        A.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True)
+    if not (mat_type.endswith("sbaij") or mat_type == "is"):
+        A.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, True)
+    A.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True)
+    A.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag)
+    A.setOption(PETSc.Mat.Option.FORCE_DIAGONAL_ENTRIES, on_diag)
+    A.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True)
+    A.setUp()
+    return A
+
+
+def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[], comm=None, mat_type="aij", allow_repeated=False):
+    """Tabulate exterior derivative: Vc -> Vf as an explicit sparse matrix.
+       Works for any tensor-product basis. These are the same matrices one needs for HypreAMS and friends."""
     if comm is None:
         comm = Vf.comm
+
     ec = Vc.finat_element
     ef = Vf.finat_element
     if ef.formdegree - ec.formdegree != 1:
@@ -1717,29 +1738,30 @@ def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[], comm=None):
         temp.destroy()
         eye.destroy()
 
-    sizes = tuple(V.dof_dset.layout_vec.getSizes() for V in (Vf, Vc))
-    preallocator = PETSc.Mat().create(comm=comm)
-    preallocator.setType(PETSc.Mat.Type.PREALLOCATOR)
-    preallocator.setSizes(sizes)
-    preallocator.setUp()
-
-    kernel = ElementKernel(Dhat, name="exterior_derivative").kernel()
-    indices = tuple(op2.Dat(V.dof_dset, V.local_to_global_map(bcs).indices)(op2.READ, V.cell_node_map())
-                    for V, bcs in zip((Vf, Vc), (fbcs, cbcs)))
-    assembler = op2.ParLoop(kernel,
+    if mat_type != "is":
+        allow_repeated = False
+    spaces = (Vf, Vc)
+    bcs = (fbcs, cbcs)
+    lgmaps = tuple(V.local_to_global_map(bcs) for V, bcs in zip(spaces, bcs))
+    indices_acc = tuple(mask_local_indices(V, lgmap, allow_repeated) for V, lgmap in zip(spaces, lgmaps))
+    if mat_type == "is":
+        lgmaps = tuple(unghosted_lgmap(V, lgmap, allow_repeated) for V, lgmap in zip(spaces, lgmaps))
+
+    sizes = tuple(V.dof_dset.layout_vec.getSizes() for V in spaces)
+    preallocator = get_preallocator(comm, sizes, *lgmaps)
+
+    kernel = ElementKernel(Dhat, name="exterior_derivative")
+    assembler = op2.ParLoop(kernel.kernel(mat_type=mat_type),
                             Vc.mesh().cell_set,
-                            *(op2.PassthroughArg(op2.OpaqueType("Mat"), m.handle) for m in (preallocator, Dhat)),
-                            *indices)
+                            *kernel.make_args(preallocator),
+                            *indices_acc)
     assembler()
     preallocator.assemble()
-    nnz = get_preallocation(preallocator, sizes[0][0])
-    preallocator.destroy()
 
-    Dmat = PETSc.Mat().createAIJ(sizes, Vf.block_size, nnz=nnz, comm=comm)
-    Dmat.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True)
+    Dmat = allocate_matrix(preallocator, mat_type, allow_repeated=allow_repeated)
     assembler.arguments[0].data = Dmat.handle
+    preallocator.destroy()
     assembler()
-
     Dmat.assemble()
     Dhat.destroy()
     return Dmat
diff --git a/pyop2/parloop.py b/pyop2/parloop.py
index c70f4c9fb7..6d208570b1 100644
--- a/pyop2/parloop.py
+++ b/pyop2/parloop.py
@@ -296,7 +296,8 @@ def replace_lgmaps(self):
                     olgmaps = []
                     for m, lgmaps in zip(pl_arg.data, pl_arg.lgmaps):
                         olgmaps.append(m.handle.getLGMap())
-                        m.handle.setLGMap(*lgmaps)
+                        if m.handle.type != "is":
+                            m.handle.setLGMap(*lgmaps)
                     orig_lgmaps.append(olgmaps)
         return tuple(orig_lgmaps)
 
@@ -309,7 +310,8 @@ def restore_lgmaps(self, orig_lgmaps):
         for arg, d in reversed(list(zip(self.global_kernel.arguments, self.arguments))):
             if isinstance(arg, (MatKernelArg, MixedMatKernelArg)) and d.lgmaps is not None:
                 for m, lgmaps in zip(d.data, orig_lgmaps.pop()):
-                    m.handle.setLGMap(*lgmaps)
+                    if m.handle.type != "is":
+                        m.handle.setLGMap(*lgmaps)
 
     @cached_property
     def _has_mats(self):
diff --git a/pyop2/types/mat.py b/pyop2/types/mat.py
index d0fb9e2404..ec54e15414 100644
--- a/pyop2/types/mat.py
+++ b/pyop2/types/mat.py
@@ -58,6 +58,7 @@ def __init__(self, dsets, maps_and_regions, name=None, nest=None, block_sparse=N
         if self._initialized:
             return
         self._dsets = dsets
+        self._lgmaps = None
         self._maps_and_regions = maps_and_regions
         self._block_sparse = block_sparse
         self._diagonal_block = diagonal_block
@@ -560,6 +561,7 @@ class Mat(AbstractMat):
 
     def __init__(self, *args, **kwargs):
         self.mat_type = kwargs.pop("mat_type", None)
+        self.sub_mat_type = kwargs.pop("sub_mat_type", None)
         super().__init__(*args, **kwargs)
         self._init()
         self.assembly_state = Mat.ASSEMBLED
@@ -618,11 +620,16 @@ def _init_monolithic(self):
         rset, cset = self.sparsity.dsets
         rlgmap = rset.unblocked_lgmap
         clgmap = cset.unblocked_lgmap
-        mat.createAIJ(size=((self.nrows, None), (self.ncols, None)),
-                      nnz=(self.sparsity.nnz, self.sparsity.onnz),
-                      bsize=1,
-                      comm=self.comm)
+        if self.mat_type == "is":
+            # FIXME monolithic lgmaps
+            rlgmap, clgmap = self.sparsity._lgmaps
+            create = mat.createIS
+        else:
+            create = mat.createAIJ
+        size = ((self.nrows, None), (self.ncols, None))
+        create(size, bsize=1, comm=self.comm)
         mat.setLGMap(rmap=rlgmap, cmap=clgmap)
+        mat.setPreallocationNNZ((self.sparsity.nnz, self.sparsity.onnz))
         self.handle = mat
         self._blocks = []
         rows, cols = self.sparsity.shape
@@ -635,7 +642,8 @@ def _init_monolithic(self):
         mat.setOption(mat.Option.KEEP_NONZERO_PATTERN, True)
         # We completely fill the allocated matrix when zeroing the
         # entries, so raise an error if we "missed" one.
-        mat.setOption(mat.Option.UNUSED_NONZERO_LOCATION_ERR, True)
+        if self.mat_type != "is":
+            mat.setOption(mat.Option.UNUSED_NONZERO_LOCATION_ERR, True)
         mat.setOption(mat.Option.IGNORE_OFF_PROC_ENTRIES, False)
         mat.setOption(mat.Option.NEW_NONZERO_ALLOCATION_ERR, True)
         # The first assembly (filling with zeros) sets all possible entries.
@@ -663,8 +671,10 @@ def _init_nest(self):
         for i in range(rows):
             row = []
             for j in range(cols):
+                # Only set sub_mat_type on the diagonal blocks
                 row.append(Mat(self.sparsity[i, j], self.dtype,
-                           '_'.join([self.name, str(i), str(j)])))
+                               '_'.join([self.name, str(i), str(j)]),
+                               mat_type=self.sub_mat_type if i == j else None))
             self._blocks.append(row)
         # PETSc Mat.createNest wants a flattened list of Mats
         mat.createNest([[m.handle for m in row_] for row_ in self._blocks],
@@ -685,7 +695,11 @@ def _init_block(self):
         col_lg = cset.lgmap
         rdim, cdim = self.dims[0][0]
 
-        if rdim == cdim and rdim > 1 and self.sparsity._block_sparse:
+        if self.mat_type == "is":
+            row_lg, col_lg = self.sparsity._lgmaps
+            block_sparse = False
+            create = mat.createIS
+        elif rdim == cdim and rdim > 1 and self.sparsity._block_sparse:
             # Size is total number of rows and columns, but the
             # /sparsity/ is the block sparsity.
             block_sparse = True
@@ -695,12 +709,11 @@ def _init_block(self):
             # the /dof/ sparsity.
             block_sparse = False
             create = mat.createAIJ
-        create(size=((self.nrows, None),
-                     (self.ncols, None)),
-               nnz=(self.sparsity.nnz, self.sparsity.onnz),
-               bsize=(rdim, cdim),
-               comm=self.comm)
+        size = ((self.nrows, None), (self.ncols, None))
+        create(size, bsize=(rdim, cdim), comm=self.comm)
+
         mat.setLGMap(rmap=row_lg, cmap=col_lg)
+        mat.setPreallocationNNZ((self.sparsity.nnz, self.sparsity.onnz))
         # Stash entries destined for other processors
         mat.setOption(mat.Option.IGNORE_OFF_PROC_ENTRIES, False)
         # Any add or insertion that would generate a new entry that has not
@@ -716,7 +729,8 @@ def _init_block(self):
         mat.setOption(mat.Option.KEEP_NONZERO_PATTERN, True)
         # We completely fill the allocated matrix when zeroing the
         # entries, so raise an error if we "missed" one.
-        mat.setOption(mat.Option.UNUSED_NONZERO_LOCATION_ERR, True)
+        if self.mat_type != "is":
+            mat.setOption(mat.Option.UNUSED_NONZERO_LOCATION_ERR, True)
         # Put zeros in all the places we might eventually put a value.
         with profiling.timed_region("MatZeroInitial"):
             sparsity.fill_with_zeros(mat, self.sparsity.dims[0][0],
@@ -783,14 +797,21 @@ def zero(self):
         self.handle.zeroEntries()
 
     @mpi.collective
-    def zero_rows(self, rows, diag_val=1.0):
+    def zero_rows(self, rows, diag_val=1.0, idx=None):
         """Zeroes the specified rows of the matrix, with the exception of the
         diagonal entry, which is set to diag_val. May be used for applying
         strong boundary conditions.
 
         :param rows: a :class:`Subset` or an iterable"""
-        self.assemble()
         rows = rows.indices if isinstance(rows, Subset) else rows
+        rows = np.asarray(rows, dtype=dtypes.IntType)
+        rbs, _ = self.dims[0][0]
+        if rbs > 1:
+            if idx is not None:
+                rows = rbs * rows + idx
+            else:
+                rows = np.dstack([rbs*rows + i for i in range(rbs)]).flatten()
+        self.assemble()
         self.handle.zeroRowsLocal(rows, diag_val)
 
     def _flush_assembly(self):
@@ -814,11 +835,19 @@ def set_local_diagonal_entries(self, rows, diag_val=1.0, idx=None):
             else:
                 rows = np.dstack([rbs*rows + i for i in range(rbs)]).flatten()
         rows = rows.reshape(-1, 1)
-        self.change_assembly_state(Mat.INSERT_VALUES)
-        if len(rows) > 0:
-            values = np.full(rows.shape, diag_val, dtype=dtypes.ScalarType)
-            self.handle.setValuesLocalRCV(rows, rows, values,
-                                          addv=PETSc.InsertMode.INSERT_VALUES)
+        if self.handle.type == "is":
+            self.handle.assemble()
+            # PETSc does not properly handle local dofs that map
+            # to a negative global index
+            rmap, _ = self.handle.getLGMap()
+            rows = rows[rmap.apply(rows) > -1]
+            self.handle.zeroRowsColumnsLocal(rows, diag_val)
+        else:
+            self.change_assembly_state(Mat.INSERT_VALUES)
+            if len(rows) > 0:
+                values = np.full(rows.shape, diag_val, dtype=dtypes.ScalarType)
+                self.handle.setValuesLocalRCV(rows, rows, values,
+                                              addv=PETSc.InsertMode.INSERT_VALUES)
 
     @mpi.collective
     def assemble(self):
@@ -930,11 +959,19 @@ def set_local_diagonal_entries(self, rows, diag_val=1.0, idx=None):
             else:
                 rows = np.dstack([rbs*rows + i for i in range(rbs)]).flatten()
         rows = rows.reshape(-1, 1)
-        self.change_assembly_state(Mat.INSERT_VALUES)
-        if len(rows) > 0:
-            values = np.full(rows.shape, diag_val, dtype=dtypes.ScalarType)
-            self.handle.setValuesLocalRCV(rows, rows, values,
-                                          addv=PETSc.InsertMode.INSERT_VALUES)
+        if self.handle.type == "is":
+            self.handle.assemble()
+            # PETSc does not properly handle local dofs that map
+            # to a negative global index
+            rmap, _ = self.handle.getLGMap()
+            rows = rows[rmap.apply(rows) > -1]
+            self.handle.zeroRowsColumnsLocal(rows, diag_val)
+        else:
+            self.change_assembly_state(Mat.INSERT_VALUES)
+            if len(rows) > 0:
+                values = np.full(rows.shape, diag_val, dtype=dtypes.ScalarType)
+                self.handle.setValuesLocalRCV(rows, rows, values,
+                                              addv=PETSc.InsertMode.INSERT_VALUES)
 
     def addto_values(self, rows, cols, values):
         """Add a block of values to the :class:`Mat`."""
diff --git a/tests/firedrake/regression/test_assemble.py b/tests/firedrake/regression/test_assemble.py
index 94e3439c6e..43d823d883 100644
--- a/tests/firedrake/regression/test_assemble.py
+++ b/tests/firedrake/regression/test_assemble.py
@@ -115,6 +115,62 @@ def test_mat_nest_real_block_assembler_correctly_reuses_tensor(mesh):
     assert A2.M is A1.M
 
 
+@pytest.mark.parallel
+@pytest.mark.parametrize("shape,mat_type", [("scalar", "is"), ("vector", "is"), ("mixed", "is"), ("mixed", "nest")])
+@pytest.mark.parametrize("dirichlet_bcs", [False, True])
+def test_assemble_matis(mesh, shape, mat_type, dirichlet_bcs):
+    if shape == "vector":
+        V = VectorFunctionSpace(mesh, "CG", 1)
+    else:
+        V = FunctionSpace(mesh, "CG", 1)
+        if shape == "mixed":
+            V = V * V
+    if V.value_size == 1:
+        A = 1
+    else:
+        A = as_matrix([[2, -1], [-1, 2]])
+
+    u = TrialFunction(V)
+    v = TestFunction(V)
+    a = inner(A * grad(u), grad(v))*dx
+    if dirichlet_bcs:
+        bcs = [DirichletBC(V.sub(i), 0, (i % 4+1, (i+2) % 4+1)) for i in range(V.value_size)]
+    else:
+        bcs = None
+
+    ais = assemble(a, bcs=bcs, mat_type=mat_type, sub_mat_type="is").petscmat
+
+    aij = PETSc.Mat()
+    if ais.type == "nest":
+        blocks = []
+        for i in range(len(V)):
+            row = []
+            for j in range(len(V)):
+                bis = ais.getNestSubMatrix(i, j)
+                if i == j:
+                    assert bis.type == "is"
+                    bij = PETSc.Mat()
+                    bis.convert("aij", bij)
+                else:
+                    bij = bis
+                row.append(bij)
+            blocks.append(row)
+        anest = PETSc.Mat()
+        anest.createNest(blocks,
+                         isrows=V.dof_dset.field_ises,
+                         iscols=V.dof_dset.field_ises,
+                         comm=ais.comm)
+        anest.convert("aij", aij)
+    else:
+        assert ais.type == "is"
+        ais.convert("aij", aij)
+
+    aij_ref = assemble(a, bcs=bcs, mat_type="aij").petscmat
+    aij_ref.axpy(-1, aij)
+    ind, iptr, values = aij_ref.getValuesCSR()
+    assert np.allclose(values, 0)
+
+
 def test_assemble_diagonal(mesh):
     V = FunctionSpace(mesh, "P", 3)
     u = TrialFunction(V)
diff --git a/tests/firedrake/regression/test_bddc.py b/tests/firedrake/regression/test_bddc.py
index f1f1630e2d..293e992f39 100644
--- a/tests/firedrake/regression/test_bddc.py
+++ b/tests/firedrake/regression/test_bddc.py
@@ -3,13 +3,14 @@
 from firedrake.petsc import DEFAULT_DIRECT_SOLVER
 
 
-def bddc_params(static_condensation):
+def bddc_params():
     chol = {
         "pc_type": "cholesky",
         "pc_factor_mat_solver_type": "petsc",
         "pc_factor_mat_ordering_type": "natural",
     }
     sp = {
+        "mat_type": "is",
         "pc_type": "python",
         "pc_python_type": "firedrake.BDDCPC",
         "bddc_pc_bddc_neumann": chol,
@@ -19,12 +20,14 @@ def bddc_params(static_condensation):
     return sp
 
 
-def solver_parameters(static_condensation=True):
+def solver_parameters(static_condensation=False, variant=None):
     rtol = 1E-8
     atol = 1E-12
-    sp_bddc = bddc_params(static_condensation)
-    repeated = True
-    if static_condensation:
+    sp_bddc = bddc_params()
+    if variant != "fdm":
+        sp = sp_bddc
+
+    elif static_condensation:
         sp = {
             "pc_type": "python",
             "pc_python_type": "firedrake.FacetSplitPC",
@@ -33,7 +36,7 @@ def solver_parameters(static_condensation=True):
             "facet_fdm_static_condensation": True,
             "facet_fdm_pc_use_amat": False,
             "facet_fdm_mat_type": "is",
-            "facet_fdm_mat_is_allow_repeated": repeated,
+            "facet_fdm_mat_is_allow_repeated": True,
             "facet_fdm_pc_type": "fieldsplit",
             "facet_fdm_pc_fieldsplit_type": "symmetric_multiplicative",
             "facet_fdm_pc_fieldsplit_diag_use_amat": False,
@@ -47,22 +50,23 @@ def solver_parameters(static_condensation=True):
             "pc_type": "python",
             "pc_python_type": "firedrake.FDMPC",
             "fdm_pc_use_amat": False,
-            "fdm_mat_type": "is",
-            "fdm_mat_is_allow_repeated": repeated,
+            "fdm_mat_is_allow_repeated": True,
             "fdm": sp_bddc,
         }
+
     sp.update({
-        "mat_type": "matfree",
         "ksp_type": "cg",
         "ksp_norm_type": "natural",
         "ksp_monitor": None,
         "ksp_rtol": rtol,
         "ksp_atol": atol,
     })
+    if variant == "fdm":
+        sp["mat_type"] = "matfree"
     return sp
 
 
-def solve_riesz_map(mesh, family, degree, bcs, condense):
+def solve_riesz_map(mesh, family, degree, variant, bcs, condense=False, vector=False):
     dirichlet_ids = []
     if bcs:
         dirichlet_ids = ["on_boundary"]
@@ -74,7 +78,10 @@ def solve_riesz_map(mesh, family, degree, bcs, condense):
         family = "RTCE" if tdim == 2 else "NCE"
     if family.endswith("F"):
         family = "RTCF" if tdim == 2 else "NCF"
-    V = FunctionSpace(mesh, family, degree, variant="fdm")
+
+    fs = VectorFunctionSpace if vector else FunctionSpace
+
+    V = fs(mesh, family, degree, variant=variant)
     v = TestFunction(V)
     u = TrialFunction(V)
     d = {
@@ -95,13 +102,21 @@ def solve_riesz_map(mesh, family, degree, bcs, condense):
     bcs = [DirichletBC(V, u_exact, sub) for sub in dirichlet_ids]
     nsp = None
     if formdegree == 0:
-        nsp = VectorSpaceBasis([Function(V).interpolate(Constant(1))])
+        b = np.zeros(V.value_shape)
+        expr = Constant(b)
+        basis = []
+        for i in np.ndindex(V.value_shape):
+            b[...] = 0
+            b[i] = 1
+            expr.assign(b)
+            basis.append(Function(V).interpolate(expr))
+        nsp = VectorSpaceBasis(basis)
         nsp.orthonormalize()
 
     uh = Function(V, name="solution")
     problem = LinearVariationalProblem(a, L, uh, bcs=bcs)
 
-    sp = solver_parameters(condense)
+    sp = solver_parameters(condense, variant=variant)
     solver = LinearVariationalSolver(problem, near_nullspace=nsp,
                                      solver_parameters=sp,
                                      options_prefix="")
@@ -120,11 +135,34 @@ def mesh(request):
 
 
 @pytest.mark.parallel
-@pytest.mark.parametrize("family", "Q")
+@pytest.mark.parametrize("degree", range(1, 3))
+@pytest.mark.parametrize("variant", ("spectral", "fdm"))
+def test_vertex_dofs(mesh, variant, degree):
+    from firedrake.preconditioners.bddc import get_restricted_dofs
+    P1 = FunctionSpace(mesh, "Lagrange", 1, variant=variant)
+    V0 = FunctionSpace(mesh, "Lagrange", degree, variant=variant)
+    v = get_restricted_dofs(V0, "vertex")
+    assert v.getSizes() == P1.dof_dset.layout_vec.getSizes()
+
+
+@pytest.mark.parallel
 @pytest.mark.parametrize("degree", (4,))
-@pytest.mark.parametrize("condense", (False, True))
-def test_bddc_fdm(mesh, family, degree, condense):
+@pytest.mark.parametrize("family", "Q")
+def test_bddc_fdm(mesh, family, degree):
+    variant = "fdm"
     bcs = True
     tdim = mesh.topological_dimension()
     expected = 6 if tdim == 2 else 11
-    assert solve_riesz_map(mesh, family, degree, bcs, condense) <= expected
+    assert solve_riesz_map(mesh, family, degree, variant, bcs) <= expected
+
+
+@pytest.mark.parallel
+@pytest.mark.parametrize("degree", (4,))
+@pytest.mark.parametrize("family", "Q")
+@pytest.mark.parametrize("vector", (False, True), ids=("scalar", "vector"))
+def test_bddc_aij(mesh, family, degree, vector):
+    variant = None
+    bcs = True
+    tdim = mesh.topological_dimension()
+    expected = 7 if tdim == 2 else 11
+    assert solve_riesz_map(mesh, family, degree, variant, bcs, vector=vector) <= expected
diff --git a/tests/firedrake/regression/test_fdm.py b/tests/firedrake/regression/test_fdm.py
index 947ae43fbc..50aa3e7df3 100644
--- a/tests/firedrake/regression/test_fdm.py
+++ b/tests/firedrake/regression/test_fdm.py
@@ -1,4 +1,5 @@
 import pytest
+import numpy
 from firedrake import *
 from pyop2.utils import as_tuple
 from firedrake.petsc import DEFAULT_DIRECT_SOLVER
@@ -334,3 +335,73 @@ def test_ipdg_direct_solver(fs):
             assert uvec.norm() < 1E-8
     else:
         assert norm(u_exact-uh, "H1") < 1.0E-8
+
+
+@pytest.mark.parallel(nprocs=2)
+@pytest.mark.parametrize("mat_type", ("aij",))
+@pytest.mark.parametrize("variant,degree", [("spectral", 1), ("spectral", 4), ("integral", 4), ("fdm", 4)])
+def test_tabulate_gradient(mesh, variant, degree, mat_type):
+    from firedrake.preconditioners.fdm import tabulate_exterior_derivative
+    tdim = mesh.topological_dimension()
+    family = {1: "DG", 2: "RTCE", 3: "NCE"}[tdim]
+
+    V0 = FunctionSpace(mesh, "Lagrange", degree, variant=variant)
+    V1 = FunctionSpace(mesh, family, degree-(tdim == 1), variant=variant)
+    D = tabulate_exterior_derivative(V0, V1, mat_type=mat_type)
+
+    M = assemble(inner(TrialFunction(V1), TestFunction(V1))*dx).petscmat
+    Dij = D if D.type.endswith("aij") else D.convert(M.type, PETSc.Mat())
+    B = M.matMult(Dij)
+
+    Bref = assemble(inner(grad(TrialFunction(V0)), TestFunction(V1))*dx).petscmat
+    Bref.axpy(-1, B)
+    _, _, vals = Bref.getValuesCSR()
+    assert numpy.allclose(vals, 0)
+
+
+@pytest.mark.parallel(nprocs=2)
+@pytest.mark.parametrize("mat_type", ("aij",))
+@pytest.mark.parametrize("variant,degree", [("spectral", 1), ("spectral", 4), ("integral", 4), ("fdm", 4)])
+def test_tabulate_curl(mesh, variant, degree, mat_type):
+    from firedrake.preconditioners.fdm import tabulate_exterior_derivative
+    tdim = mesh.topological_dimension()
+    family1 = {1: "CG", 2: "CG", 3: "NCE"}[tdim]
+    family2 = {1: "DG", 2: "RTCF", 3: "NCF"}[tdim]
+
+    V1 = FunctionSpace(mesh, family1, degree, variant=variant)
+    V2 = FunctionSpace(mesh, family2, degree-(tdim == 1), variant=variant)
+    D = tabulate_exterior_derivative(V1, V2, mat_type=mat_type)
+
+    M = assemble((-1)**(tdim-1)*inner(TrialFunction(V2), TestFunction(V2))*dx).petscmat
+    Dij = D if D.type.endswith("aij") else D.convert(M.type, PETSc.Mat())
+    B = M.matMult(Dij)
+
+    Bref = assemble(inner(curl(TrialFunction(V1)), TestFunction(V2))*dx).petscmat
+    Bref.axpy(-1, B)
+    _, _, vals = Bref.getValuesCSR()
+    assert numpy.allclose(vals, 0)
+
+
+@pytest.mark.parallel(nprocs=2)
+@pytest.mark.parametrize("mat_type", ("aij", "is"))
+@pytest.mark.parametrize("variant,degree", [("spectral", 1), ("spectral", 4), ("integral", 4), ("fdm", 4)])
+def test_tabulate_divergence(mesh, variant, degree, mat_type):
+    from firedrake.preconditioners.fdm import tabulate_exterior_derivative
+    tdim = mesh.topological_dimension()
+    family = {1: "CG", 2: "RTCF", 3: "NCF"}[tdim]
+
+    V = FunctionSpace(mesh, family, degree, variant=variant)
+    Q = FunctionSpace(mesh, "DG", 0, variant=f"integral({degree-1})")
+    D = tabulate_exterior_derivative(V, Q, mat_type=mat_type, allow_repeated=True)
+
+    # Fix scale
+    Jdet = JacobianDeterminant(mesh)
+    M = assemble(inner(TrialFunction(Q)*(1/Jdet), TestFunction(Q))*dx, diagonal=True)
+    with M.dat.vec as mvec:
+        D.diagonalScale(mvec, None)
+
+    Dref = assemble(inner(div(TrialFunction(V)), TestFunction(Q))*dx).petscmat
+    Dij = D if D.type.endswith("aij") else D.convert(Dref.type, PETSc.Mat())
+    Dref.axpy(-1, Dij)
+    _, _, vals = Dref.getValuesCSR()
+    assert numpy.allclose(vals, 0)