Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
d083df3
WIP: Implement a OpenMP target field type and allocator
pbartholomew08 Jun 3, 2025
532ab8d
Move OpenMP target offloads to omp/target directory
pbartholomew08 Jun 3, 2025
6035977
Optionally build OpenMP Target backend
pbartholomew08 Jun 3, 2025
88e6e42
Fix types in OpenMP target block allocator
pbartholomew08 Jun 3, 2025
72ef9ff
WIP on OMP target vecadd
pbartholomew08 Aug 5, 2025
b0b92e5
Correcting link order
pbartholomew08 Aug 5, 2025
f7f02f8
Cleaning up test_vecadd
pbartholomew08 Aug 6, 2025
72aa72c
The omp backend must assign its allocator based on class
pbartholomew08 Aug 6, 2025
a3c683d
Don't declare the method as a module function
pbartholomew08 Aug 6, 2025
4acdc65
Need to allocate the new field pointer
pbartholomew08 Aug 6, 2025
326fac0
Specify the target mapping operations when creating a field
pbartholomew08 Aug 6, 2025
1bad543
Initially 'working' OMP target vec add
pbartholomew08 Aug 6, 2025
9a9822b
Remove debugging print statement
pbartholomew08 Aug 7, 2025
b381fe7
We only need the 3-D view of data on the device
pbartholomew08 Aug 8, 2025
7cff7c6
Remove duplicate entry from CMakeLists sources
pbartholomew08 Aug 13, 2025
0278f9f
Mark index calculations as offloadable
pbartholomew08 Aug 13, 2025
3bcf141
Add support for get/set fields with OMP target
pbartholomew08 Aug 13, 2025
cc67db6
WIP - attempting simplified OMP calls
pbartholomew08 Aug 19, 2025
e6285b6
WIP allocating memory using OpenMP API
pbartholomew08 Aug 25, 2025
f07f8f0
Continuing ...
pbartholomew08 Aug 25, 2025
ce52a2a
Trying to map pointers to target...
pbartholomew08 Aug 25, 2025
304e289
Initial working version of OMPTARGET vec add
pbartholomew08 Sep 19, 2025
562bcdf
Restore IBM module
pbartholomew08 Sep 30, 2025
86a878a
Minor formatting change
pbartholomew08 Jan 9, 2026
45bc7e4
Adding support for OMP offload of timestepping
pbartholomew08 Dec 11, 2025
92b6ffe
Merge pull request #1 from pbartholomew08/omp_gpu-timestepping
pbartholomew08 Feb 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ else()
message(STATUS "ADIOS2 is disabled")
endif()

set(OMP_TGT OFF CACHE BOOL
"Enable OpenMP target offloading.")

set(CMAKE_Fortran_MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/include)
add_subdirectory(src)
add_subdirectory(tests)
16 changes: 14 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ set(SRC
solver.f90
tdsops.f90
time_integrator.f90
ordering.f90
mesh.f90
mesh_content.f90
field.f90
Expand Down Expand Up @@ -45,6 +44,9 @@ set(CUDASRC
set(BACKENDSRC
backend/omp/backend.f90
)
set(OMPTGTSRC # For OMP offloading
backend/omp/target/allocator.f90
)
set(2DECOMPFFTSRC
backend/omp/poisson_fft.f90
decomp/decomp_2decompfft.f90
Expand Down Expand Up @@ -80,6 +82,11 @@ if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR
list(APPEND BACKENDSRC backend/cuda/backend.f90)
endif()

if(OMP_TGT)
list(APPEND SRC ${OMPTGTSRC})
list(APPEND BACKENDSRC backend/omp/target/backend.f90)
endif()

# Decide whether 2decomp&fft is supported by the build
find_package(decomp2d)

Expand All @@ -103,8 +110,8 @@ target_include_directories(x3d2_backends INTERFACE ${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(x3d2_backends PRIVATE x3d2)

add_executable(xcompact xcompact.f90)
target_link_libraries(xcompact PRIVATE x3d2)
target_link_libraries(xcompact PRIVATE x3d2_backends)
target_link_libraries(xcompact PRIVATE x3d2)

# if CUDA compiler
if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR
Expand All @@ -122,11 +129,16 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL "GNU" OR
set(CMAKE_Fortran_FLAGS "-cpp -std=f2018")
set(CMAKE_Fortran_FLAGS_DEBUG "-g -Og -Wall -Wpedantic -Werror -Wimplicit-interface -Wimplicit-procedure -Wno-unused-dummy-argument")
set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -ffast-math")
if (OMP_TGT)
# A bit of a hack - hardcoded for MI300A
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fopenmp --offload-arch=gfx942")
endif()
elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL "Cray")
set(CMAKE_Fortran_FLAGS "-eF -M878") # -M878 suppresses WARNING multiple module includes (not useful)
set(CMAKE_Fortran_FLAGS_DEBUG "-G2 -O0")
set(CMAKE_Fortran_FLAGS_RELEASE "-O3")
target_link_options(x3d2 INTERFACE -h omp)
target_link_options(x3d2_backends INTERFACE -h omp)
endif()

if(WITH_2DECOMPFFT)
Expand Down
1 change: 1 addition & 0 deletions src/allocator.f90
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ function get_block(self, dir, data_loc) result(handle)

! Apply bounds remapping based on requested direction
call handle%set_shape(dims)

end function get_block

subroutine release_block(self, handle)
Expand Down
2 changes: 1 addition & 1 deletion src/backend/omp/backend.f90
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ function init(mesh, allocator) result(backend)
call backend%base_init()

select type (allocator)
type is (allocator_t)
class is (allocator_t)
! class level access to the allocator
backend%allocator => allocator
end select
Expand Down
179 changes: 179 additions & 0 deletions src/backend/omp/target/allocator.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
!!! backends/omp/target/allocator.f90
!!
!! Implements an allocator specialised to OMP target offloading

module m_omptgt_allocator

use iso_c_binding, only: c_ptr, c_f_pointer, c_sizeof
use omp_lib, only: omp_target_alloc, omp_target_free, omp_get_default_device

use m_common, only: dp

use m_allocator, only: allocator_t
use m_mesh, only: mesh_t
use m_field, only: field_t

implicit none

private
public :: omptgt_allocator_t
public :: omptgt_field_t

type, extends(allocator_t) :: omptgt_allocator_t
contains
procedure :: create_block => create_block_omptgt
end type omptgt_allocator_t

interface omptgt_allocator_t
module procedure omptgt_allocator_init
end interface omptgt_allocator_t

type, extends(field_t) :: omptgt_field_t
! A device-resident field
integer, private :: dev_id
type(c_ptr), private :: dev_ptr
real(dp), pointer, private :: p_data_tgt(:) => null()
real(dp), pointer, contiguous :: data_tgt(:, :, :) => null()
contains
procedure :: destroy => omptgt_field_destroy
procedure :: fill => fill_omptgt
procedure :: get_shape => get_shape_omptgt
procedure :: set_shape => set_shape_omptgt
end type omptgt_field_t

interface omptgt_field_t
module procedure omptgt_field_init
end interface omptgt_field_t

contains

! Constructor for the OMP target offload allocator
type(omptgt_allocator_t) function omptgt_allocator_init(dims, sz) result(a)
integer, intent(in) :: dims(3)
integer, intent(in) :: sz

a%allocator_t = allocator_t(dims, sz)
end function omptgt_allocator_init

! Allocates a device-resident block
function create_block_omptgt(self, next) result(ptr)
class(omptgt_allocator_t), intent(inout) :: self
class(field_t), pointer, intent(in) :: next
type(omptgt_field_t), pointer :: newblock_tgt
class(field_t), pointer :: ptr

self%next_id = self%next_id + 1
allocate(newblock_tgt)
newblock_tgt = omptgt_field_t(self%ngrid, next, id=self%next_id)
ptr => newblock_tgt

end function create_block_omptgt

! Constructs a device-resident field
type(omptgt_field_t) function omptgt_field_init(ngrid, next, id) result(f)
integer, intent(in) :: ngrid
class(field_t), pointer, intent(in) :: next
integer, intent(in) :: id

f%refcount = 0
f%next => next
f%id = id

f%dev_id = omp_get_default_device()
f%dev_ptr = omp_target_alloc(ngrid * c_sizeof(0.0_dp), f%dev_id)
call c_f_pointer(f%dev_ptr, f%p_data_tgt, shape=[ngrid])

end function omptgt_field_init

subroutine omptgt_field_destroy(self)
class(omptgt_field_t) :: self

nullify(self%data_tgt)
nullify(self%p_data_tgt)
call omp_target_free(self%dev_ptr, self%dev_id)
end subroutine

! Deallocates device-resident memory before deallocating the base type
subroutine destroy(self)
class(omptgt_allocator_t) :: self

class(field_t), pointer :: ptr

ptr => self%first
do
if (.not. associated(ptr)) then
exit
end if

select type(ptr)
type is(omptgt_field_t)
call ptr%destroy()
end select

ptr => ptr%next
end do

call self%allocator_t%destroy()
end subroutine

subroutine fill_omptgt(self, c)
class(omptgt_field_t) :: self
real(dp), intent(in) :: c

!call fill_omptgt_(self%p_data_tgt, c, size(self%p_data_tgt))
call fill_omptgt_3d_(self%data_tgt, c)

end subroutine fill_omptgt

subroutine fill_omptgt_(p_data_tgt, c, n)
real(dp), dimension(:), intent(inout) :: p_data_tgt
real(dp), intent(in) :: c
integer, intent(in) :: n

integer :: i

!$omp target teams distribute parallel do has_device_addr(p_data_tgt)
do i = 1, n
p_data_tgt(i) = c
end do
!$omp end target teams distribute parallel do

end subroutine

subroutine fill_omptgt_3d_(data_tgt, c)
real(dp), dimension(:, :, :), intent(inout) :: data_tgt
real(dp), intent(in) :: c

integer, dimension(3) :: n
integer :: i, j, k

n = shape(data_tgt)

!$omp target teams distribute parallel do collapse(3) has_device_addr(data_tgt)
do k = 1, n(3)
do j = 1, n(2)
do i = 1, n(1)
data_tgt(i, j, k) = c
end do
end do
end do
!$omp end target teams distribute parallel do
end subroutine

function get_shape_omptgt(self) result(dims)
class(omptgt_field_t) :: self
integer :: dims(3)

dims = shape(self%data_tgt)
end function

subroutine set_shape_omptgt(self, dims)
class(omptgt_field_t) :: self
integer, intent(in) :: dims(3)

call c_f_pointer(self%dev_ptr, self%data_tgt, shape=dims)

end subroutine

end module m_omptgt_allocator

Loading
Loading