diff --git a/CMakeLists.txt b/CMakeLists.txt
index ef340ea4f..d489c0474 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,6 +25,9 @@ else()
   message(STATUS "ADIOS2 is disabled")
 endif()
 
+set(OMP_TGT OFF CACHE BOOL
+  "Enable OpenMP target offloading.")
+
 set(CMAKE_Fortran_MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/include)
 add_subdirectory(src)
 add_subdirectory(tests)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 91d2bccec..d00264169 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -9,7 +9,6 @@ set(SRC
   solver.f90
   tdsops.f90
   time_integrator.f90
-  ordering.f90
   mesh.f90
   mesh_content.f90
   field.f90
@@ -45,6 +44,9 @@ set(CUDASRC
 set(BACKENDSRC
   backend/omp/backend.f90
   )
+set(OMPTGTSRC # For OMP offloading
+  backend/omp/target/allocator.f90
+  )
 set(2DECOMPFFTSRC
   backend/omp/poisson_fft.f90
   decomp/decomp_2decompfft.f90
@@ -80,6 +82,11 @@ if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR
   list(APPEND BACKENDSRC backend/cuda/backend.f90)
 endif()
 
+if(OMP_TGT)
+  list(APPEND SRC ${OMPTGTSRC})
+  list(APPEND BACKENDSRC backend/omp/target/backend.f90)
+endif()
+
 # Decide whether 2decomp&fft is supported by the build
 find_package(decomp2d)
 
@@ -103,8 +110,8 @@ target_include_directories(x3d2_backends INTERFACE ${CMAKE_CURRENT_BINARY_DIR})
 target_link_libraries(x3d2_backends PRIVATE x3d2)
 
 add_executable(xcompact xcompact.f90)
-target_link_libraries(xcompact PRIVATE x3d2)
 target_link_libraries(xcompact PRIVATE x3d2_backends)
+target_link_libraries(xcompact PRIVATE x3d2)
 
 # if CUDA compiler
 if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR
@@ -122,11 +129,16 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL "GNU" OR
   set(CMAKE_Fortran_FLAGS "-cpp -std=f2018")
   set(CMAKE_Fortran_FLAGS_DEBUG "-g -Og -Wall -Wpedantic -Werror -Wimplicit-interface -Wimplicit-procedure -Wno-unused-dummy-argument")
   set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -ffast-math")
+  if (OMP_TGT)
+    # A bit of a hack - hardcoded for MI300A
+    set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fopenmp --offload-arch=gfx942")
+  endif()
 elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL "Cray")
   set(CMAKE_Fortran_FLAGS "-eF -M878") # -M878 suppresses WARNING multiple module includes (not useful)
   set(CMAKE_Fortran_FLAGS_DEBUG "-G2 -O0")
   set(CMAKE_Fortran_FLAGS_RELEASE "-O3")
   target_link_options(x3d2 INTERFACE -h omp)
+  target_link_options(x3d2_backends INTERFACE -h omp)
 endif()
 
 if(WITH_2DECOMPFFT)
diff --git a/src/allocator.f90 b/src/allocator.f90
index 698beda79..0b380541c 100644
--- a/src/allocator.f90
+++ b/src/allocator.f90
@@ -149,6 +149,7 @@ function get_block(self, dir, data_loc) result(handle)
 
     ! Apply bounds remapping based on requested direction
     call handle%set_shape(dims)
+
   end function get_block
 
   subroutine release_block(self, handle)
diff --git a/src/backend/omp/backend.f90 b/src/backend/omp/backend.f90
index 370108a5a..a2c2fab80 100644
--- a/src/backend/omp/backend.f90
+++ b/src/backend/omp/backend.f90
@@ -71,7 +71,7 @@ function init(mesh, allocator) result(backend)
     call backend%base_init()
 
     select type (allocator)
-    type is (allocator_t)
+    class is (allocator_t)
       ! class level access to the allocator
       backend%allocator => allocator
     end select
diff --git a/src/backend/omp/target/allocator.f90 b/src/backend/omp/target/allocator.f90
new file mode 100644
index 000000000..f8f66cd45
--- /dev/null
+++ b/src/backend/omp/target/allocator.f90
@@ -0,0 +1,179 @@
+!!! backends/omp/target/allocator.f90
+!!
+!! Implements an allocator specialised to OMP target offloading
+
+module m_omptgt_allocator
+
+  use iso_c_binding, only: c_ptr, c_f_pointer, c_sizeof
+  use omp_lib, only: omp_target_alloc, omp_target_free, omp_get_default_device
+
+  use m_common, only: dp
+
+  use m_allocator, only: allocator_t
+  use m_mesh, only: mesh_t
+  use m_field, only: field_t
+
+  implicit none
+
+  private
+  public :: omptgt_allocator_t
+  public :: omptgt_field_t
+
+  type, extends(allocator_t) :: omptgt_allocator_t
+  contains
+    procedure :: create_block => create_block_omptgt
+  end type omptgt_allocator_t
+
+  interface omptgt_allocator_t
+    module procedure omptgt_allocator_init
+  end interface omptgt_allocator_t
+
+  type, extends(field_t) :: omptgt_field_t
+    ! A device-resident field
+    integer, private :: dev_id
+    type(c_ptr), private :: dev_ptr
+    real(dp), pointer, private :: p_data_tgt(:) => null()
+    real(dp), pointer, contiguous :: data_tgt(:, :, :) => null()
+  contains
+    procedure :: destroy => omptgt_field_destroy
+    procedure :: fill => fill_omptgt
+    procedure :: get_shape => get_shape_omptgt
+    procedure :: set_shape => set_shape_omptgt
+  end type omptgt_field_t
+  
+  interface omptgt_field_t
+    module procedure omptgt_field_init
+  end interface omptgt_field_t
+  
+contains
+
+  ! Constructor for the OMP target offload allocator
+  type(omptgt_allocator_t) function omptgt_allocator_init(dims, sz) result(a)
+    integer, intent(in) :: dims(3)
+    integer, intent(in) :: sz
+
+    a%allocator_t = allocator_t(dims, sz)
+  end function omptgt_allocator_init
+
+  ! Allocates a device-resident block
+  function create_block_omptgt(self, next) result(ptr)
+    class(omptgt_allocator_t), intent(inout) :: self
+    class(field_t), pointer, intent(in) :: next
+    type(omptgt_field_t), pointer :: newblock_tgt
+    class(field_t), pointer :: ptr
+
+    self%next_id = self%next_id + 1
+    allocate(newblock_tgt)
+    newblock_tgt = omptgt_field_t(self%ngrid, next, id=self%next_id)
+    ptr => newblock_tgt
+
+  end function create_block_omptgt
+
+  ! Constructs a device-resident field
+  type(omptgt_field_t) function omptgt_field_init(ngrid, next, id) result(f)
+    integer, intent(in) :: ngrid
+    class(field_t), pointer, intent(in) :: next
+    integer, intent(in) :: id
+
+    f%refcount = 0
+    f%next => next
+    f%id = id
+
+    f%dev_id = omp_get_default_device()
+    f%dev_ptr = omp_target_alloc(ngrid * c_sizeof(0.0_dp), f%dev_id)
+    call c_f_pointer(f%dev_ptr, f%p_data_tgt, shape=[ngrid])
+    
+  end function omptgt_field_init
+
+  subroutine omptgt_field_destroy(self)
+    class(omptgt_field_t) :: self
+
+    nullify(self%data_tgt)
+    nullify(self%p_data_tgt)
+    call omp_target_free(self%dev_ptr, self%dev_id)
+  end subroutine
+
+  ! Deallocates device-resident memory before deallocating the base type
+  subroutine destroy(self)
+    class(omptgt_allocator_t) :: self
+
+    class(field_t), pointer :: ptr
+
+    ptr => self%first
+    do
+      if (.not. associated(ptr)) then
+        exit
+      end if
+
+      select type(ptr)
+      type is(omptgt_field_t)
+        call ptr%destroy()
+      end select
+
+      ptr => ptr%next
+    end do
+
+    call self%allocator_t%destroy()
+  end subroutine
+
+  subroutine fill_omptgt(self, c)
+    class(omptgt_field_t) :: self
+    real(dp), intent(in) :: c
+
+    !call fill_omptgt_(self%p_data_tgt, c, size(self%p_data_tgt))
+    call fill_omptgt_3d_(self%data_tgt, c)
+
+  end subroutine fill_omptgt
+
+  subroutine fill_omptgt_(p_data_tgt, c, n)
+    real(dp), dimension(:), intent(inout) :: p_data_tgt
+    real(dp), intent(in) :: c
+    integer, intent(in) :: n
+
+    integer :: i
+    
+    !$omp target teams distribute parallel do has_device_addr(p_data_tgt)
+    do i = 1, n
+      p_data_tgt(i) = c
+    end do
+    !$omp end target teams distribute parallel do
+
+  end subroutine
+
+  subroutine fill_omptgt_3d_(data_tgt, c)
+    real(dp), dimension(:, :, :), intent(inout) :: data_tgt
+    real(dp), intent(in) :: c
+
+    integer, dimension(3) :: n
+    integer :: i, j, k
+
+    n = shape(data_tgt)
+
+    !$omp target teams distribute parallel do collapse(3) has_device_addr(data_tgt)
+    do k = 1, n(3)
+      do j = 1, n(2)
+        do i = 1, n(1)
+          data_tgt(i, j, k) = c
+        end do
+      end do
+    end do
+    !$omp end target teams distribute parallel do
+  end subroutine
+
+  function get_shape_omptgt(self) result(dims)
+    class(omptgt_field_t) :: self
+    integer :: dims(3)
+
+    dims = shape(self%data_tgt)
+  end function
+
+  subroutine set_shape_omptgt(self, dims)
+    class(omptgt_field_t) :: self
+    integer, intent(in) :: dims(3)
+
+    call c_f_pointer(self%dev_ptr, self%data_tgt, shape=dims)
+
+  end subroutine
+
+end module m_omptgt_allocator
+  
diff --git a/src/backend/omp/target/backend.f90 b/src/backend/omp/target/backend.f90
new file mode 100644
index 000000000..15452dd80
--- /dev/null
+++ b/src/backend/omp/target/backend.f90
@@ -0,0 +1,310 @@
+!!! src/backend/omp/target/backend.f90
+!!
+!! OpenMP target offload backend implementation.
+!!
+!! Note this extends the CPU (host) OpenMP backend with the intention of being able to use fallback implementations where necessary.
+
+module m_omptgt_backend
+
+  use m_common, only: dp, DIR_C, get_dirs_from_rdr
+
+  use m_allocator, only: allocator_t
+  use m_mesh, only: mesh_t
+  use m_field, only: field_t
+  use m_ordering, only: get_index_reordering
+
+  use m_omp_common, only: SZ
+  use m_omp_backend, only: omp_backend_t
+
+  use m_omptgt_allocator, only: omptgt_field_t
+
+  implicit none
+
+  type, extends(omp_backend_t) :: omptgt_backend_t
+  contains
+    procedure :: copy_f_to_data => copy_f_to_data_omptgt
+    procedure :: copy_data_to_f => copy_data_to_f_omptgt
+    procedure :: reorder => reorder_omptgt
+    procedure :: vecadd => vecadd_omptgt
+    procedure :: veccopy => veccopy_omptgt
+  end type
+
+  interface omptgt_backend_t
+    module procedure omptgt_backend_init
+  end interface
+
+  private
+  public :: omptgt_backend_t
+
+contains
+
+  type(omptgt_backend_t) function omptgt_backend_init(mesh, allocator) result(backend)
+    
+    type(mesh_t), target, intent(inout) :: mesh
+    class(allocator_t), target, intent(inout) :: allocator
+
+    backend%omp_backend_t = omp_backend_t(mesh, allocator)
+  end function
+
+  subroutine veccopy_omptgt(self, dst, src)
+    
+    class(omptgt_backend_t) :: self
+    class(field_t), intent(inout) :: dst
+    class(field_t), intent(in) :: src
+
+    if (src%dir /= dst%dir) then
+      error stop "Called vector copy with incompatible fields"
+    end if
+    
+    select type(dst)
+    type is (omptgt_field_t)
+      select type(src)
+      type is (omptgt_field_t)
+        call veccopy_offload_(dst%data_tgt, src%data_tgt)
+      class default
+        error stop "Called omptgt vector copy with unsupported source vector"
+      end select
+    class default
+      error stop "Called omptgt vector copy with unsupported destination vector"
+    end select
+  end subroutine
+
+  subroutine veccopy_offload_(dst, src)
+
+    real(dp), dimension(:, :, :), intent(inout) :: dst
+    real(dp), dimension(:, :, :), intent(in) :: src
+
+    integer, dimension(3) :: n
+    integer :: i, j, k
+
+    n = shape(dst)
+
+    !$omp target teams distribute parallel do collapse(3) has_device_addr(dst, src)
+    do k = 1, n(3)
+      do j = 1, n(2)
+        do i = 1, n(1)
+          dst(i, j, k) = src(i, j, k)
+        end do
+      end do
+    end do
+    !$omp end target teams distribute parallel do
+
+  end subroutine
+
+  subroutine vecadd_omptgt(self, a, x, b, y)
+
+    class(omptgt_backend_t) :: self
+    real(dp), intent(in) :: a
+    class(field_t), intent(in) :: x
+    real(dp), intent(in) :: b
+    class(field_t), intent(inout) :: y
+
+    if (x%dir /= y%dir) then
+      error stop "Called vector add with incompatible fields"
+    end if
+
+    select type(x)
+    type is(omptgt_field_t)
+      select type(y)
+      type is (omptgt_field_t)
+        call vecadd_offload(self, a, x, b, y)
+      class default
+        error stop "Device/host fallback not yet implemented"
+      end select
+    class default
+      call self%omp_backend_t%vecadd(a, x, b, y)
+    end select
+
+  end subroutine
+
+  subroutine vecadd_offload(self, a, x, b, y)
+
+    class(omptgt_backend_t) :: self
+    real(dp), intent(in) :: a
+    type(omptgt_field_t), intent(in) :: x
+    real(dp), intent(in) :: b
+    type(omptgt_field_t), intent(inout) :: y
+
+    integer, dimension(3) :: dims
+
+    dims = self%allocator%get_padded_dims(x%dir)
+
+    call vecadd_offload_(dims, a, x%data_tgt, b, y%data_tgt)
+
+  end subroutine
+
+  subroutine vecadd_offload_(dims, a, x, b, y)
+    integer, dimension(3), intent(in) :: dims
+    real(dp), intent(in) :: a
+    real(dp), dimension(:, :, :), intent(in) :: x
+    real(dp), intent(in) :: b
+    real(dp), dimension(:, :, :), intent(inout) :: y
+
+    integer :: i, j, k
+
+    !$omp target teams distribute parallel do collapse(3) has_device_addr(x, y)
+    do k = 1, dims(3)
+      do j = 1, dims(2)
+        do i = 1, dims(1)
+          y(i, j, k) = a * x(i, j, k) + b * y(i, j, k)
+        end do
+      end do
+    end do
+    !$omp end target teams distribute parallel do
+  end subroutine
+
+  subroutine copy_data_to_f_omptgt(self, f, data)
+    class(omptgt_backend_t), intent(inout) :: self
+    class(field_t), intent(inout) :: f
+    real(dp), dimension(:, :, :), intent(in) :: data
+
+    integer, dimension(3) :: dims
+
+    dims = self%allocator%get_padded_dims(f%dir)
+
+    ! XXX: This could be improved following cuda/backend.f90:resolve_field_t()
+    select type(f)
+    type is(omptgt_field_t)
+      call copy_data_to_f_omptgt_(f%data_tgt, data, dims)
+    class default
+      error stop "Unsupported"
+    end select
+
+  end subroutine copy_data_to_f_omptgt
+
+  subroutine copy_data_to_f_omptgt_(f_arr, d, dims)
+    real(dp), dimension(:, :, :), intent(inout) :: f_arr
+    real(dp), dimension(:, :, :), intent(in) :: d
+    integer, dimension(3), intent(in) :: dims
+
+    integer :: i, j, k
+
+    ! XXX: This could be improved following cuda/backend.f90:resolve_field_t()
+    !$omp target teams loop collapse(3) map(to:d) has_device_addr(f_arr)
+    do k = 1, dims(3)
+      do j = 1, dims(2)
+        do i = 1, dims(1)
+          f_arr(i, j, k) = d(i, j, k)
+        end do
+      end do
+    end do
+    !$omp end target teams loop
+
+  end subroutine
+    
+  subroutine copy_f_to_data_omptgt(self, data, f)
+    class(omptgt_backend_t), intent(inout) :: self
+    real(dp), dimension(:, :, :), intent(out) :: data
+    class(field_t), intent(in) :: f
+
+    integer, dimension(3) :: dims
+
+    dims = self%allocator%get_padded_dims(f%dir)
+
+    select type(f)
+    type is(omptgt_field_t)
+      call copy_f_to_data_omptgt_(data, f%data_tgt, dims)
+    class default
+      error stop "Unsupported"
+    end select
+    
+  end subroutine copy_f_to_data_omptgt
+
+  subroutine copy_f_to_data_omptgt_(data, f_arr, dims)
+    real(dp), dimension(:, :, :), intent(out) :: data
+    real(dp), dimension(:, :, :), intent(in) :: f_arr
+    integer, dimension(3), intent(in) :: dims
+
+    integer :: i, j, k
+
+    !$omp target teams distribute parallel do collapse(3) map(from:data) has_device_addr(f_arr)
+    do k = 1, dims(3)
+      do j = 1, dims(2)
+        do i = 1, dims(1)
+          data(i, j, k) = f_arr(i, j, k)
+        end do
+      end do
+    end do
+    !$omp end target teams distribute parallel do
+
+  end subroutine
+
+  subroutine reorder_omptgt(self, u_, u, direction)
+    class(omptgt_backend_t) :: self
+    class(field_t), intent(inout) :: u_
+    class(field_t), intent(in) :: u
+    integer, intent(in) :: direction
+    integer, dimension(3) :: dims, cart_padded
+    integer :: dir_from, dir_to
+
+    dims = self%allocator%get_padded_dims(u%dir)
+    cart_padded = self%allocator%get_padded_dims(DIR_C)
+    call get_dirs_from_rdr(dir_from, dir_to, direction)
+
+    ! XXX: This could be improved following cuda/backend.f90:resolve_field_t()
+    select type(u_)
+    type is(omptgt_field_t)
+      select type(u)
+      type is(omptgt_field_t)
+        call reorder_omptgt_dd(u_%data_tgt, u%data_tgt, dims, dir_from, dir_to, cart_padded)
+      class default
+        call reorder_omptgt_dh(u_%data_tgt, u%data, dims, dir_from, dir_to, cart_padded)
+      end select
+    class default
+      error stop "Unsupported"
+    end select
+
+    ! reorder keeps the data_loc the same
+    call u_%set_data_loc(u%data_loc)
+
+  end subroutine reorder_omptgt
+
+  subroutine reorder_omptgt_dd(u_, u, dims, dir_from, dir_to, cart_padded)
+    real(dp), dimension(:, :, :), pointer :: u_
+    real(dp), dimension(:, :, :), pointer, intent(in) :: u
+    integer, dimension(3), intent(in) :: dims
+    integer, intent(in) :: dir_from, dir_to
+    integer, dimension(3), intent(in) :: cart_padded
+
+    integer :: i, j, k
+    integer :: out_i, out_j, out_k
+
+    !$omp target teams distribute parallel do private(out_i, out_j, out_k) collapse(3) has_device_addr(u_, u)
+    do k = 1, dims(3)
+      do j = 1, dims(2)
+        do i = 1, dims(1)
+          call get_index_reordering(out_i, out_j, out_k, i, j, k, &
+                                    dir_from, dir_to, SZ, cart_padded)
+          u_(out_i, out_j, out_k) = u(i, j, k)
+        end do
+      end do
+    end do
+    !$omp end target teams distribute parallel do
+
+  end subroutine
+
+  subroutine reorder_omptgt_dh(u_, u, dims, dir_from, dir_to, cart_padded)
+    real(dp), dimension(:, :, :), pointer :: u_
+    real(dp), dimension(:, :, :), pointer, intent(in) :: u
+    integer, dimension(3), intent(in) :: dims
+    integer, intent(in) :: dir_from, dir_to
+    integer, dimension(3), intent(in) :: cart_padded
+
+    integer :: i, j, k
+    integer :: out_i, out_j, out_k
+
+    !$omp target teams distribute parallel do private(out_i, out_j, out_k) collapse(3) map(to:u) has_device_addr(u_)
+    do k = 1, dims(3)
+      do j = 1, dims(2)
+        do i = 1, dims(1)
+          call get_index_reordering(out_i, out_j, out_k, i, j, k, &
+                                    dir_from, dir_to, SZ, cart_padded)
+          u_(out_i, out_j, out_k) = u(i, j, k)
+        end do
+      end do
+    end do
+    !$omp end target teams distribute parallel do
+
+  end subroutine
+
+end module
diff --git a/src/io/checkpoint_manager.f90 b/src/io/checkpoint_manager.f90
index 9ae0751c9..0d85a14cc 100644
--- a/src/io/checkpoint_manager.f90
+++ b/src/io/checkpoint_manager.f90
@@ -1,20 +1,20 @@
 module m_checkpoint_manager
-! @brief Manages the creation and restoration of simulation checkpoints
-!! for restart capabilities.
-!!
-!! @details This module is responsible for periodically saving the full, unstrided
-!! simulation state to a file. This allows a simulation to be stopped and resumed
-!! from the exact state it was in.
-!!
-!! Key features include:
-!! - Reading all checkpoint settings from a configuration file
-!! - Periodically writing the full-resolution simulation state
-!! - Handling the full logic for restarting a simulation from
-!! a specified checkpoint file.
-!! - A safe-write strategy that writes to a temporary file first,
-!!   then atomically renames it to the final filename to
-!! prevent corrupted checkpoints.
-!! - Optional cleanup of old checkpoint files to conserve disk space.
+  ! @brief Manages the creation and restoration of simulation checkpoints
+  !! for restart capabilities.
+  !!
+  !! @details This module is responsible for periodically saving the full, unstrided
+  !! simulation state to a file. This allows a simulation to be stopped and resumed
+  !! from the exact state it was in.
+  !!
+  !! Key features include:
+  !! - Reading all checkpoint settings from a configuration file
+  !! - Periodically writing the full-resolution simulation state
+  !! - Handling the full logic for restarting a simulation from
+  !! a specified checkpoint file.
+  !! - A safe-write strategy that writes to a temporary file first,
+  !!   then atomically renames it to the final filename to
+  !! prevent corrupted checkpoints.
+  !! - Optional cleanup of old checkpoint files to conserve disk space.
   use mpi, only: MPI_COMM_WORLD, MPI_Comm_rank, MPI_Abort
   use m_common, only: dp, i8, DIR_C, get_argument
   use m_field, only: field_t
diff --git a/src/ordering.f90 b/src/ordering.f90
index 19be0a583..06520db85 100644
--- a/src/ordering.f90
+++ b/src/ordering.f90
@@ -12,6 +12,7 @@ module m_ordering
 
   pure subroutine get_index_ijk(i, j, k, dir_i, dir_j, dir_k, dir, &
                                 SZ, nx_padded, ny_padded, nz_padded)
+    !$omp declare target
       !! Get cartesian index from application storage directional one
     integer, intent(out) :: i, j, k                   ! cartesian indices
     integer, intent(in) :: dir_i, dir_j, dir_k        ! application storage indices
@@ -41,6 +42,7 @@ end subroutine get_index_ijk
 
   pure subroutine get_index_dir(dir_i, dir_j, dir_k, i, j, k, dir, &
                                 SZ, nx_padded, ny_padded, nz_padded)
+    !$omp declare target
       !! Get application storage directional index from cartesian index
     integer, intent(out) :: dir_i, dir_j, dir_k        ! application storage indices
     integer, intent(in) :: i, j, k                     ! cartesian indices
@@ -71,6 +73,7 @@ end subroutine get_index_dir
   pure subroutine get_index_reordering( &
     out_i, out_j, out_k, in_i, in_j, in_k, dir_from, dir_to, sz, cart_padded &
     )
+    !$omp declare target
     !! Converts indices in between any two DIR_?
     integer, intent(out) :: out_i, out_j, out_k ! output indices
     integer, intent(in) :: in_i, in_j, in_k ! input indices
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index bbd291cbd..e288f265d 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -25,9 +25,18 @@ function(define_test testfile np backend)
     endif()
     find_package(OpenMP REQUIRED)
     target_link_libraries(${test_name} PRIVATE OpenMP::OpenMP_Fortran)
+
+    if(${backend} STREQUAL omp_tgt)
+      # Note this is somewhat of a hack - hardcoded to build against MI300A
+      target_compile_options(${test_name} PRIVATE "-fopenmp")
+      target_compile_options(${test_name} PRIVATE "--offload-arch=gfx942")
+      target_compile_options(${test_name} PRIVATE "-DOMP_TGT")
+      target_link_options(${test_name} PRIVATE "-fopenmp")
+      target_link_options(${test_name} PRIVATE "--offload-arch=gfx942")
+    endif()
   endif()
-  target_link_libraries(${test_name} PRIVATE x3d2)
   target_link_libraries(${test_name} PRIVATE x3d2_backends)
+  target_link_libraries(${test_name} PRIVATE x3d2)
 
 if(WITH_2DECOMPFFT)
   find_package(decomp2d REQUIRED)
@@ -78,6 +87,12 @@ endif()
 
 define_test(test_setget_field.f90 1 omp)
 
+if(${OMP_TGT})
+  define_test(test_vecadd.f90 1 omp_tgt)
+  define_test(test_time_integrator.f90 1 omp_tgt)
+  define_test(test_setget_field.f90 1 omp_tgt)
+endif()
+
 if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR
    ${CMAKE_Fortran_COMPILER_ID} STREQUAL "NVHPC")
   define_test(test_reordering.f90 1 cuda)
diff --git a/tests/omptgt/test_omptgt_allocator.f90 b/tests/omptgt/test_omptgt_allocator.f90
new file mode 100644
index 000000000..0beab10a4
--- /dev/null
+++ b/tests/omptgt/test_omptgt_allocator.f90
@@ -0,0 +1,95 @@
+program test_allocator_omptgt
+  use iso_fortran_env, only: stderr => error_unit
+
+  use m_allocator, only: allocator_t, field_t
+  use m_common, only: dp, pi, DIR_X
+  use m_mesh, only: mesh_t
+
+  use m_omptg_allocator, only: omptgt_allocator_t
+
+  implicit none
+
+  logical :: allpass
+  integer, dimension(3) :: dims, nproc_dir
+  real(dp) :: L_global(3)
+  character(len=20) :: BC_x(2), BC_y(2), BC_z(2)
+  class(allocator_t), allocatable :: allocator
+  class(mesh_t), allocatable :: mesh
+  class(field_t), pointer :: ptr1, ptr2, ptr3
+  integer, allocatable :: l(:)
+  integer :: ierr
+
+  call MPI_Init(ierr)
+
+  dims = [8, 8, 8]
+  nproc_dir = [1, 1, 1]
+  L_global = [2*pi, 2*pi, 2*pi]
+
+  BC_x = ['periodic', 'periodic']
+  BC_y = ['periodic', 'periodic']
+  BC_z = ['periodic', 'periodic']
+
+  mesh = mesh_t(dims, nproc_dir, L_global, BC_x, BC_y, BC_z)
+
+  allocator = omptgt_allocator_t(mesh, 8)
+
+  allpass = .true.
+
+  ! Get the list of ids for free blocks.  By default there are none
+  ! and returned list is [0].
+  l = allocator%get_block_ids()
+  if (size(l) /= 1 .or. l(1) /= 0) then
+    allpass = .false.
+    write (stderr, '(a)') 'Free list is initialised empty... failed'
+  else
+    write (stderr, '(a)') 'Free list is initialised empty... passed'
+  end if
+
+  ! Request two blocks and release them in reverse order.  List should
+  ! contain two free blocks. (1 -> 2)
+  ptr1 => allocator%get_block(DIR_X)
+  ptr2 => allocator%get_block(DIR_X)
+  call allocator%release_block(ptr2)
+  call allocator%release_block(ptr1)
+
+  if (.not. all(allocator%get_block_ids() == [1, 2])) then
+    allpass = .false.
+    write (stderr, '(a)') 'Blocks are released correctly... failed'
+  else
+    write (stderr, '(a)') 'Blocks are released correctly... passed'
+  end if
+
+  !! Destroy the free list and check that the list is empty again.
+  call allocator%destroy()
+  l = allocator%get_block_ids()
+  if (size(l) /= 1 .or. l(1) /= 0 .or. allocator%next_id /= 0) then
+    allpass = .false.
+    write (stderr, '(a)') 'Free list is correctly destroyed... failed'
+  else
+    write (stderr, '(a)') 'Free list is correctly destroyed... passed'
+  end if
+
+  ! Request a block from a list of three.  This should grab the first
+  ! block on top of the pile and reduce the free list to two blocks.
+  ptr1 => allocator%get_block(DIR_X)
+  ptr2 => allocator%get_block(DIR_X)
+  ptr3 => allocator%get_block(DIR_X)
+  call allocator%release_block(ptr3)
+  call allocator%release_block(ptr2)
+  call allocator%release_block(ptr1)
+  ptr1 => allocator%get_block(DIR_X)
+
+  if (.not. all(allocator%get_block_ids() == [2, 3])) then
+    allpass = .false.
+    write (stderr, '(a)') 'Block is correctly allocated... failed'
+  else
+    write (stderr, '(a)') 'Block is correctly allocated... passed'
+  end if
+
+  ! TODO: Check that data is resident on device
+  ! TODO: Check that data is freed from device
+
+  call allocator%destroy()
+
+  call MPI_Finalize(ierr)
+end program test_allocator_omptgt
diff --git a/tests/test_setget_field.f90 b/tests/test_setget_field.f90
index c8ccdc599..3e6af9ab3 100644
--- a/tests/test_setget_field.f90
+++ b/tests/test_setget_field.f90
@@ -10,7 +10,12 @@ program test_setget_field
   use m_cuda_backend, only: cuda_backend_t
   use m_cuda_common, only: SZ
 #else
+#ifndef OMP_TGT
   use m_omp_backend, only: omp_backend_t
+#else
+  use m_omptgt_backend, only: omptgt_backend_t
+  use m_omptgt_allocator, only: omptgt_allocator_t
+#endif
   use m_omp_common, only: SZ
 #endif
   use m_mesh, only: mesh_t
@@ -23,8 +28,13 @@ program test_setget_field
   type(cuda_allocator_t), target :: cuda_allocator
   type(cuda_backend_t), target :: cuda_backend
 #else
+#ifndef OMP_TGT
   type(allocator_t), target :: omp_allocator
   type(omp_backend_t), target :: omp_backend
+#else
+  type(omptgt_allocator_t), target :: omptgt_allocator
+  type(omptgt_backend_t), target :: omptgt_backend
+#endif
 #endif
   type(mesh_t) :: mesh
 
@@ -32,41 +42,75 @@ program test_setget_field
   real(dp), dimension(:, :, :), allocatable :: arr
   integer, dimension(3) :: shape_c
 
+  integer :: irank
   integer :: ierr
 
   call MPI_Init(ierr)
+  call MPI_Comm_rank(MPI_COMM_WORLD, irank, ierr)
+
+  print *, "Initialised MPI"
 
   mesh = mesh_t([16, 32, 48], [1, 1, 1], [1.0_dp, 1.0_dp, 1.0_dp], &
                 ["periodic", "periodic"], &
                 ["periodic", "periodic"], &
                 ["periodic", "periodic"])
 
+  print *, "Initialised mesh"
+
 #ifdef CUDA
   cuda_allocator = cuda_allocator_t(mesh%get_dims(VERT), SZ)
   allocator => cuda_allocator
 #else
+#ifndef OMP_TGT
   omp_allocator = allocator_t(mesh%get_dims(VERT), SZ)
   allocator => omp_allocator
 
   omp_backend = omp_backend_t(mesh, allocator)
   backend => omp_backend
+#else
+  omptgt_allocator = omptgt_allocator_t(mesh%get_dims(VERT), SZ)
+  allocator => omptgt_allocator
+
+  omptgt_backend = omptgt_backend_t(mesh, allocator)
+  backend => omptgt_backend
+#endif
 #endif
 
+  print *, "Initialised backend"
+
   fld => backend%allocator%get_block(DIR_X, VERT)
   fld_c => backend%allocator%get_block(DIR_C, VERT)
   shape_c = fld_c%get_shape()
+  print *, shape_c
   allocate (arr(shape_c(1), shape_c(2), shape_c(3)))
   arr = 1.0_dp
+
+  print *, "Initialised data"
+
   call backend%set_field_data(fld, arr)
 
+  print *, "Set field data"
+
   if (fld%data_loc /= VERT) then
     error stop "Field location was changed by set_field_data"
   end if
 
+  arr = 0.0_dp
+  call backend%get_field_data(arr, fld)
+  if (any(arr /= 1.0_dp)) then
+    error stop "Getting/setting field data failed"
+  end if
+
+  print *, "Get field data"
+
   deallocate (arr)
   call backend%allocator%release_block(fld)
   call backend%allocator%release_block(fld_c)
 
+  call MPI_Barrier(MPI_COMM_WORLD, ierr)
   call MPI_Finalize(ierr)
+  if (irank == 0) then
+    print *, "PASS"
+  end if
 
 end program test_setget_field
diff --git a/tests/test_time_integrator.f90 b/tests/test_time_integrator.f90
index 5f1ef4ec7..8937ce0db 100644
--- a/tests/test_time_integrator.f90
+++ b/tests/test_time_integrator.f90
@@ -15,8 +15,13 @@ program test_omp_adamsbashforth
   use m_cuda_backend, only: cuda_backend_t
   use m_cuda_common, only: SZ
 #else
-  use m_omp_backend, only: omp_backend_t
   use m_omp_common, only: SZ
+#ifndef OMP_TGT
+  use m_omp_backend, only: omp_backend_t
+#else
+  use m_omptgt_backend, only: omptgt_backend_t
+  use m_omptgt_allocator, only: omptgt_allocator_t
+#endif
 #endif
 
   implicit none
@@ -48,8 +53,13 @@ program test_omp_adamsbashforth
   type(cuda_allocator_t), target :: cuda_allocator
   integer :: ndevs, devnum
 #else
-  type(omp_backend_t), target :: omp_backend
+#ifndef OMP_TGT
   type(allocator_t), target :: omp_allocator
+  type(omp_backend_t), target :: omp_backend
+#else
+  type(omptgt_allocator_t), target :: omptgt_allocator
+  type(omptgt_backend_t), target :: omptgt_backend
+#endif
 #endif
   class(time_intg_t), allocatable :: time_integrator
 
@@ -89,12 +99,22 @@ program test_omp_adamsbashforth
   backend => cuda_backend
   if (nrank == 0) print *, 'CUDA backend instantiated'
 #else
+#ifndef OMP_TGT
   omp_allocator = allocator_t(mesh%get_dims(VERT), SZ)
   allocator => omp_allocator
+#else
+  omptgt_allocator = omptgt_allocator_t(mesh%get_dims(VERT), SZ)
+  allocator => omptgt_allocator
+#endif
   if (nrank == 0) print *, 'OpenMP allocator instantiated'
 
+#ifndef OMP_TGT
   omp_backend = omp_backend_t(mesh, allocator)
   backend => omp_backend
+#else
+  omptgt_backend = omptgt_backend_t(mesh, allocator)
+  backend => omptgt_backend
+#endif
   if (nrank == 0) print *, 'OpenMP backend instantiated'
 #endif
 
diff --git a/tests/test_vecadd.f90 b/tests/test_vecadd.f90
index df9f336ad..a2a094b67 100644
--- a/tests/test_vecadd.f90
+++ b/tests/test_vecadd.f90
@@ -16,7 +16,12 @@ program test_vecadd
   use m_cuda_backend, only: cuda_backend_t
 #else
   use m_omp_common, only: SZ
+#ifndef OMP_TGT
   use m_omp_backend, only: omp_backend_t
+#else
+  use m_omptgt_backend, only: omptgt_backend_t
+  use m_omptgt_allocator, only: omptgt_allocator_t
+#endif
 #endif
 
   implicit none
@@ -35,7 +40,12 @@ program test_vecadd
 #else
   type(allocator_t), target :: omp_allocator
   type(allocator_t), pointer :: host_allocator
+#ifndef OMP_TGT
   type(omp_backend_t), target :: omp_backend
+#else
+  type(omptgt_allocator_t), target :: omptgt_allocator
+  type(omptgt_backend_t), target :: omptgt_backend
+#endif
 #endif
   class(field_t), pointer :: a => null()
   class(field_t), pointer :: b => null()
@@ -63,21 +73,7 @@ program test_vecadd
 
   mesh = mesh_t(dims_global, nproc_dir, L_global, BC_x, BC_y, BC_z)
 
-#ifdef CUDA
-  cuda_allocator = cuda_allocator_t(mesh%get_dims(VERT), SZ)
-  allocator => cuda_allocator
-  host_allocator = allocator_t(mesh%get_dims(VERT), SZ)
-
-  cuda_backend = cuda_backend_t(mesh, allocator)
-  backend => cuda_backend
-#else
-  omp_allocator = allocator_t(mesh%get_dims(VERT), SZ)
-  allocator => omp_allocator
-  host_allocator => omp_allocator
-
-  omp_backend = omp_backend_t(mesh, allocator)
-  backend => omp_backend
-#endif
+  call initialise_test()
 
   test_pass = .true.
 
@@ -234,6 +230,32 @@ subroutine test_addab(d)
 
   end subroutine
 
+  subroutine initialise_test()
+
+#ifdef CUDA
+  cuda_allocator = cuda_allocator_t(mesh%get_dims(VERT), SZ)
+  allocator => cuda_allocator
+  host_allocator = allocator_t(mesh%get_dims(VERT), SZ)
+
+  cuda_backend = cuda_backend_t(mesh, allocator)
+  backend => cuda_backend
+#else
+  omp_allocator = allocator_t(mesh%get_dims(VERT), SZ)
+#ifdef OMP_TGT
+  omptgt_allocator = omptgt_allocator_t(mesh%get_dims(VERT), SZ)
+  allocator => omptgt_allocator
+  omptgt_backend = omptgt_backend_t(mesh, allocator)
+  backend => omptgt_backend
+#else
+  allocator => omp_allocator
+  omp_backend = omp_backend_t(mesh, allocator)
+  backend => omp_backend
+#endif
+  host_allocator => omp_allocator
+#endif
+
+  end subroutine
+
   subroutine initialise_data(a, b, c, z, d)
 
     integer, intent(in) :: d