diff --git a/content/examples/stencil/python-cupy/core.py b/content/examples/stencil/python-cupy/core.py new file mode 100644 index 0000000..06cf540 --- /dev/null +++ b/content/examples/stencil/python-cupy/core.py @@ -0,0 +1,51 @@ +# (c) 2023 ENCCS, CSC and the contributors +from heat import * + +# core_cupy.py + +import cupy as cp +import math + +# CuPy RawKernel for the stencil update +_evolve_kernel_code = """ +extern "C" __global__ +void evolve_kernel( + double* curr, const double* prev, float a, float dt, float dx2, float dy2, int nx, int ny +) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + int j = blockIdx.y * blockDim.y + threadIdx.y; + + if (i >= 1 && i < nx - 1 && j >= 1 && j < ny - 1) { + int idx = i * ny + j; + int idx_ip = (i + 1) * ny + j; + int idx_im = (i - 1) * ny + j; + int idx_jp = i * ny + (j + 1); + int idx_jm = i * ny + (j - 1); + + curr[idx] = prev[idx] + a * dt * ( + (prev[idx_ip] - 2.0f * prev[idx] + prev[idx_im]) / dx2 + + (prev[idx_jp] - 2.0f * prev[idx] + prev[idx_jm]) / dy2 + ); + } +} +""" + +_evolve_kernel = cp.RawKernel(_evolve_kernel_code, 'evolve_kernel') + +# Update the temperature values using five-point stencil +# Arguments: +# curr: current temperature field object +# prev: temperature field from previous time step +# a: diffusivity +# dt: time step +def evolve(current, previous, a, dt): + dx2, dy2 = previous.dx**2, previous.dy**2 + curr, prev = current.dev, previous.dev + + # Set thread and block sizes + nx, ny = prev.shape # These are the FULL dims, rows+2 / cols+2 + tx, ty = (16, 16) # Arbitrary choice + bx, by = math.ceil(nx / tx), math.ceil(ny / ty) + + # Run CuPy compiled kernel + _evolve_kernel((bx, by), (tx, ty), (curr, prev, cp.float32(a), cp.float32(dt), cp.float32(dx2), cp.float32(dy2), cp.int32(nx), cp.int32(ny))) diff --git a/content/examples/stencil/python-cupy/heat.py b/content/examples/stencil/python-cupy/heat.py new file mode 100644 index 0000000..5c93125 --- /dev/null +++ b/content/examples/stencil/python-cupy/heat.py @@ -0,0 +1,79 @@ +# (c) 2023 ENCCS, CSC and the contributors + +# heat.py + +# Fixed grid spacing +DX = 0.01 +DY = 0.01 +# Default temperatures +T_DISC = 5.0 +T_AREA = 65.0 +T_UPPER = 85.0 +T_LOWER = 5.0 +T_LEFT = 20.0 +T_RIGHT = 70.0 +# Default problem size +ROWS = 2000 +COLS = 2000 +NSTEPS = 500 + + +import copy +import numpy as np + + +class Field: + def __init__(self, rows, cols): + self.data = np.zeros((rows+2, cols+2), dtype=float) + self.dev = None # Device array + self.nx, self.ny = rows, cols + self.dx, self.dy = DX, DY + + +# setup.py + +def initialize(args): + rows, cols, nsteps = args.rows, args.cols, args.nsteps + current, previous = field_create(rows, cols) + return current, previous, nsteps + + +def field_create (rows, cols): + heat1 = field_generate(rows, cols) + heat2 = copy.deepcopy(heat1) + return heat1, heat2 + + +def field_generate(rows, cols): + heat = Field(rows, cols) + data, nx, ny = heat.data, heat.nx, heat.ny + _generate(data, nx, ny) + return heat + + +def field_average(heat): + return np.mean(heat.data[1:-1, 1:-1]) + + +def _generate(data, nx, ny): + # Radius of the source disc + radius = nx / 6.0 + for i in range(nx+2): + for j in range(ny+2): + # Distance of point i, j from the origin + dx = i - nx / 2 + 1 + dy = j - ny / 2 + 1 + if (dx * dx + dy * dy < radius * radius): + data[i,j] = T_DISC + else: + data[i,j] = T_AREA + + # Boundary conditions + for i in range(nx+2): + data[i,0] = T_LEFT + data[i, ny+1] = T_RIGHT + + for j in range(ny+2): + data[0,j] = T_UPPER + data[nx+1, j] = T_LOWER + diff --git a/content/examples/stencil/python-cupy/main.py b/content/examples/stencil/python-cupy/main.py new file mode 100644 index 0000000..c410e61 --- /dev/null +++ b/content/examples/stencil/python-cupy/main.py @@ -0,0 +1,100 @@ +# Main routine for heat equation solver in 2D. +# (c) 2023 ENCCS, CSC and the contributors +from core import * + +# io. py + +import time +import argparse + +import matplotlib +matplotlib.use('Agg') +import matplotlib. pyplot as plt +plt.rcParams['image.cmap'] = 'BrBG' + + +def field_write(heat, iter): + plt.gca().clear() + plt.imshow(heat.data) + plt.axis('off') + plt.savefig('heat_{0:03d}.png'.format(iter)) + + +# main_cupy.py + +def start_time (): return time.perf_counter() +def stop_time (): return time.perf_counter() + + +def main(args): + # Set up the solver + current, previous, nsteps = initialize(args) + + # Output the initial field and its temperature + field_write(current, 0) + average_temp = field_average(current) + print("Average temperature, start: %f" % average_temp) + + # Set diffusivity constant + a = 0.5 + # Compute the largest stable time step + dx2 = current.dx * current.dx + dy2 = current.dy * current. dy + dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2)) + # Set output interval + output_interval = 1500 + + # Start timer + start_clock = start_time() + + # NOTE: Allocate and transfer device buffers, add them to the field object + import cupy as cp + current.dev = cp.asarray(current.data) + previous.dev = cp.asarray(previous.data) + + # Time evolution + for iter in range(1,nsteps+1): + evolve(current, previous, a, dt) + if (iter % output_interval == 0): + # Update data on host for output + current.data = cp.asnumpy(current.dev) + field_write(current, iter) + # Swap current and previous fields for next iteration step + current, previous = previous, current + # Copy data back to host + previous.data = cp.asnumpy(previous.dev) + # Stop timer + stop_clock = stop_time() + + # Output the final field and its temperature + average_temp = field_average(previous) + print("Average temperature at end: %f" % average_temp) + # Compare temperature for reference + if (args.rows == ROWS and args.cols == COLS and args.nsteps == NSTEPS): + print("Control temperature at end: 59.281239") + field_write(previous, nsteps) + + # Determine the computation time used for all the iterations + print("Iterations took %.3f seconds." % (stop_clock - start_clock)) + + +if __name__ == '__main__': + # Process command line arguments + parser = argparse.ArgumentParser(description='Heat flow example') + parser.add_argument('rows', type=int, nargs='?', default=ROWS, + help='number of grid rows') + parser.add_argument('cols', type=int, nargs='?', default=COLS, + help='number of grid cols') + parser.add_argument('nsteps', type=int, nargs='?', default=NSTEPS, + help='number of time steps') + + args = parser.parse_args() + + ### NOTE FOR COLAB / JUPYTER NOTEBOOK USERS: + # * Use lines below to change default arguments + # * First-time runs are routinely slower; run main cell twice to get timings + # * Google machine runs out of system RAM around 15000x15000 grid size + #args = parser.parse_args(args=[]) + #args.rows, args.cols, args.nsteps = 2000, 2000, 500 + + main(args) diff --git a/content/examples/stencil/python-jax/core.py b/content/examples/stencil/python-jax/core.py new file mode 100644 index 0000000..404e16e --- /dev/null +++ b/content/examples/stencil/python-jax/core.py @@ -0,0 +1,46 @@ +# (c) 2023 ENCCS, CSC and the contributors +from heat import * + +# core.py + +import jax +import jax.numpy as jnp +from functools import partial + + +# Update the temperature values using five-point stencil +# Arguments: +# curr: current temperature field object +# prev: temperature field from previous time step +# a: diffusivity +# dt: time step +def evolve(current, previous, a, dt): + dx2, dy2 = previous.dx**2, previous.dy**2 + curr, prev = current. data, previous.data + # Run JAX-accelerated update + result = _evolve(curr, prev, a, dt, dx2, dy2) + current.data = np.array(result) + + +@partial(jax.jit, static_argnums=()) +def _evolve(curr, prev, a, dt, dx2, dy2): + curr = jnp.asarray(curr, dtype=float) + prev = jnp.asarray(prev, dtype=float) + nx, ny = prev.shape # These are the FULL dims, rows+2 / cols+2 + + # Create interior slice + i_indices = jnp.arange(1, nx-1) + j_indices = jnp.arange(1, ny-1) + + # Vectorized stencil operation + def update_interior(): + laplacian_x = (prev[2:nx, 1:ny-1] - 2*prev[1:nx-1, 1:ny-1] + prev[0:nx-2, 1:ny-1]) / dx2 + laplacian_y = (prev[1:nx-1, 2:ny] - 2*prev[1:nx-1, 1:ny-1] + prev[1:nx-1, 0:ny-2]) / dy2 + interior_update = prev[1:nx-1, 1:ny-1] + a * dt * (laplacian_x + laplacian_y) + return interior_update + + interior = update_interior() + + # Reconstruct full array with boundaries + curr_new = curr.at[1:nx-1, 1:ny-1].set(interior) + return curr_new \ No newline at end of file diff --git a/content/examples/stencil/python-jax/heat.py b/content/examples/stencil/python-jax/heat.py new file mode 100644 index 0000000..3d8ef3a --- /dev/null +++ b/content/examples/stencil/python-jax/heat.py @@ -0,0 +1,86 @@ +# (c) 2023 ENCCS, CSC and the contributors + +# heat.py + +# Fixed grid spacing +DX = 0.01 +DY = 0.01 +# Default temperatures +T_DISC = 5.0 +T_AREA = 65.0 +T_UPPER = 85.0 +T_LOWER = 5.0 +T_LEFT = 20.0 +T_RIGHT = 70.0 +# Default problem size +ROWS = 2000 +COLS = 2000 +NSTEPS = 500 + + +import copy +import numpy as np +import jax +import jax.numpy as jnp +from functools import partial + +class Field: + def __init__(self, rows, cols): + self.data = jnp.zeros((rows+2, cols+2), dtype=float) + self.nx, self.ny = rows, cols + self.dx, self.dy = DX, DY + + +# setup. py + +def initialize(args): + rows, cols, nsteps = args.rows, args.cols, args.nsteps + current, previous = field_create(rows, cols) + return current, previous, nsteps + + +def field_create (rows, cols): + heat1 = field_generate(rows, cols) + heat2 = copy.deepcopy(heat1) + return heat1, heat2 + + +def field_generate(rows, cols): + heat = Field(rows, cols) + data, nx, ny = heat.data, heat.nx, heat.ny + heat.data = _generate(data, nx, ny) + return heat + + +def field_average(heat): + return float(jnp.mean(heat.data[1:-1, 1:-1])) + + +def _generate(data, nx, ny): + # Radius of the source disc + radius = nx / 6.0 + + # Create index arrays + i_indices = jnp.arange(nx+2) + j_indices = jnp.arange(ny+2) + i_grid, j_grid = jnp.meshgrid(i_indices, j_indices, indexing='ij') + + # Distance from origin + dx = i_grid - nx / 2 + 1 + dy = j_grid - ny / 2 + 1 + distance_sq = dx * dx + dy * dy + + # Initialize with T_AREA, then set disc region + result = jnp.full((nx+2, ny+2), T_AREA, dtype=float) + result = jnp.where(distance_sq < radius * radius, T_DISC, result) + + # Boundary conditions + # Left and right boundaries + result = result.at[:, 0].set(T_LEFT) + result = result. at[:, ny+1].set(T_RIGHT) + + # Top and bottom boundaries + result = result. at[0, :].set(T_UPPER) + result = result.at[nx+1, :].set(T_LOWER) + + return result diff --git a/content/examples/stencil/python/main.py b/content/examples/stencil/python-jax/main.py similarity index 99% rename from content/examples/stencil/python/main.py rename to content/examples/stencil/python-jax/main.py index 53fddf3..4bd40bd 100644 --- a/content/examples/stencil/python/main.py +++ b/content/examples/stencil/python-jax/main.py @@ -88,4 +88,4 @@ def main(args): #args.rows, args.cols, args.nsteps = 2000, 2000, 500 main(args) - + diff --git a/content/examples/stencil/python/core.py b/content/examples/stencil/python-numba/core.py similarity index 100% rename from content/examples/stencil/python/core.py rename to content/examples/stencil/python-numba/core.py diff --git a/content/examples/stencil/python/core_cuda.py b/content/examples/stencil/python-numba/core_cuda.py similarity index 100% rename from content/examples/stencil/python/core_cuda.py rename to content/examples/stencil/python-numba/core_cuda.py diff --git a/content/examples/stencil/python/heat.py b/content/examples/stencil/python-numba/heat.py similarity index 100% rename from content/examples/stencil/python/heat.py rename to content/examples/stencil/python-numba/heat.py diff --git a/content/examples/stencil/python-numba/main.py b/content/examples/stencil/python-numba/main.py new file mode 100644 index 0000000..5de332a --- /dev/null +++ b/content/examples/stencil/python-numba/main.py @@ -0,0 +1,101 @@ +# Main routine for heat equation solver in 2D. +# (c) 2023 ENCCS, CSC and the contributors + +# For AMD +try: + from numba import hip +except ImportError: + print("No numba-hip extension. Trying to use CUDA") +else: + print("Using numba-hip. Trying to use AMD GPU") + hip.pose_as_cuda() + +from core import * + +# io.py + +import time +import argparse + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +plt.rcParams['image.cmap'] = 'BrBG' + + +def field_write(heat, iter): + plt.gca().clear() + plt.imshow(heat.data) + plt.axis('off') + plt.savefig('heat_{0:03d}.png'.format(iter)) + + +# main.py + +def start_time (): return time.perf_counter() +def stop_time (): return time.perf_counter() + + +def main(args): + # Set up the solver + current, previous, nsteps = initialize(args) + + # Output the initial field and its temperature + field_write(current, 0) + average_temp = field_average(current) + print("Average temperature, start: %f" % average_temp) + + # Set diffusivity constant + a = 0.5 + # Compute the largest stable time step + dx2 = current.dx * current.dx + dy2 = current.dy * current.dy + dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2)) + # Set output interval + output_interval = 1500 + + # Start timer + start_clock = start_time() + # Time evolution + for iter in range(1,nsteps+1): + evolve(current, previous, a, dt) + if (iter % output_interval == 0): + field_write(current, iter) + # Swap current and previous fields for next iteration step + current, previous = previous, current + # Stop timer + stop_clock = stop_time() + + # Output the final field and its temperature + average_temp = field_average(previous) + print("Average temperature at end: %f" % average_temp) + # Compare temperature for reference + if (args.rows == ROWS and args.cols == COLS and args.nsteps == NSTEPS): + print("Control temperature at end: 59.281239") + field_write(previous, nsteps) + + # Determine the computation time used for all the iterations + print("Iterations took %.3f seconds." % (stop_clock - start_clock)) + + +if __name__ == '__main__': + # Process command line arguments + parser = argparse.ArgumentParser(description='Heat flow example') + parser.add_argument('rows', type=int, nargs='?', default=ROWS, + help='number of grid rows') + parser.add_argument('cols', type=int, nargs='?', default=COLS, + help='number of grid cols') + parser.add_argument('nsteps', type=int, nargs='?', default=NSTEPS, + help='number of time steps') + + args = parser.parse_args() + + ### NOTE FOR COLAB / JUPYTER NOTEBOOK USERS: + # * Use lines below to change default arguments + # * First-time runs are routinely slower; run main cell twice to get timings + # * Google machine runs out of system RAM around 15000x15000 grid size + #args = parser.parse_args(args=[]) + #args.rows, args.cols, args.nsteps = 2000, 2000, 500 + + main(args) + diff --git a/content/examples/stencil/python/main_cuda.py b/content/examples/stencil/python-numba/main_cuda.py similarity index 94% rename from content/examples/stencil/python/main_cuda.py rename to content/examples/stencil/python-numba/main_cuda.py index c8ad9be..cfa854d 100644 --- a/content/examples/stencil/python/main_cuda.py +++ b/content/examples/stencil/python-numba/main_cuda.py @@ -1,5 +1,14 @@ # Main routine for heat equation solver in 2D. # (c) 2023 ENCCS, CSC and the contributors +# For AMD +try: + from numba import hip +except ImportError: + print("No numba-hip extension. Trying to use CUDA") +else: + print("Using numba-hip. Trying to use AMD GPU") + hip.pose_as_cuda() + from core_cuda import * # io.py