ENCCS · weilinscenccs · Nov 26, 2025 · Nov 25, 2025 · Nov 26, 2025 · Nov 26, 2025
diff --git a/content/examples/stencil/python-cupy/core.py b/content/examples/stencil/python-cupy/core.py
@@ -0,0 +1,51 @@
+# (c) 2023 ENCCS, CSC and the contributors
+from heat import *
+
+# core_cupy.py
+
+import cupy as cp
+import math
+
+# CuPy RawKernel for the stencil update
+_evolve_kernel_code = """
+extern "C" __global__
+void evolve_kernel(
+    double* curr, const double* prev, float a, float dt, float dx2, float dy2, int nx, int ny
+) {
+    int i = blockIdx.x * blockDim.x + threadIdx.x;
+    int j = blockIdx.y * blockDim.y + threadIdx.y;
+
+    if (i >= 1 && i < nx - 1 && j >= 1 && j < ny - 1) {
+        int idx = i * ny + j;
+        int idx_ip = (i + 1) * ny + j;
+        int idx_im = (i - 1) * ny + j;
+        int idx_jp = i * ny + (j + 1);
+        int idx_jm = i * ny + (j - 1);
+
+        curr[idx] = prev[idx] + a * dt * (
+            (prev[idx_ip] - 2.0f * prev[idx] + prev[idx_im]) / dx2 +
+            (prev[idx_jp] - 2.0f * prev[idx] + prev[idx_jm]) / dy2
+        );
+    }
+}
+"""
+
+_evolve_kernel = cp.RawKernel(_evolve_kernel_code, 'evolve_kernel')
+
+# Update the temperature values using five-point stencil
+# Arguments:
+#   curr: current temperature field object
+#   prev: temperature field from previous time step
+#   a: diffusivity
+#   dt: time step
+def evolve(current, previous, a, dt):
+    dx2, dy2 = previous.dx**2, previous.dy**2
+    curr, prev = current.dev, previous.dev
+
+    # Set thread and block sizes
+    nx, ny = prev.shape  # These are the FULL dims, rows+2 / cols+2
+    tx, ty = (16, 16)    # Arbitrary choice
+    bx, by = math.ceil(nx / tx), math.ceil(ny / ty)
+
+    # Run CuPy compiled kernel
+    _evolve_kernel((bx, by), (tx, ty), (curr, prev, cp.float32(a), cp.float32(dt), cp.float32(dx2), cp.float32(dy2), cp.int32(nx), cp.int32(ny)))
diff --git a/content/examples/stencil/python-cupy/heat.py b/content/examples/stencil/python-cupy/heat.py
@@ -0,0 +1,79 @@
+# (c) 2023 ENCCS, CSC and the contributors
+
+# heat.py
+
+# Fixed grid spacing
+DX = 0.01
+DY = 0.01
+# Default temperatures
+T_DISC = 5.0
+T_AREA = 65.0
+T_UPPER = 85.0
+T_LOWER = 5.0
+T_LEFT = 20.0
+T_RIGHT = 70.0
+# Default problem size
+ROWS = 2000
+COLS = 2000
+NSTEPS = 500
+
+
+import copy
+import numpy as np
+
+
+class Field:
+    def __init__(self, rows, cols):
+        self.data = np.zeros((rows+2, cols+2), dtype=float)
+        self.dev = None  # Device array
+        self.nx, self.ny = rows, cols
+        self.dx, self.dy = DX, DY
+
+
+# setup.py
+
+def initialize(args):
+    rows, cols, nsteps = args.rows, args.cols, args.nsteps
+    current, previous = field_create(rows, cols)
+    return current, previous, nsteps
+
+
+def field_create (rows, cols):
+    heat1 = field_generate(rows, cols)
+    heat2 = copy.deepcopy(heat1)
+    return heat1, heat2
+
+
+def field_generate(rows, cols):
+    heat = Field(rows, cols)
+    data, nx, ny = heat.data, heat.nx, heat.ny
+    _generate(data, nx, ny)
+    return heat
+
+
+def field_average(heat):
+    return np.mean(heat.data[1:-1, 1:-1])
+
+
+def _generate(data, nx, ny):
+    # Radius of the source disc
+    radius = nx / 6.0
+    for i in range(nx+2):
+        for j in range(ny+2):
+            # Distance of point i, j from the origin
+            dx = i - nx / 2 + 1
+            dy = j - ny / 2 + 1
+            if (dx * dx + dy * dy < radius * radius):
+                data[i,j] = T_DISC
+            else:
+                data[i,j] = T_AREA
+
+    # Boundary conditions
+    for i in range(nx+2):
+        data[i,0] = T_LEFT
+        data[i, ny+1] = T_RIGHT
+
+    for j in range(ny+2):
+        data[0,j] = T_UPPER
+        data[nx+1, j] = T_LOWER
+
diff --git a/content/examples/stencil/python-cupy/main.py b/content/examples/stencil/python-cupy/main.py
@@ -0,0 +1,100 @@
+# Main routine for heat equation solver in 2D.
+# (c) 2023 ENCCS, CSC and the contributors
+from core import *
+
+# io. py
+
+import time
+import argparse
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib. pyplot as plt
+plt.rcParams['image.cmap'] = 'BrBG'
+
+
+def field_write(heat, iter):
+    plt.gca().clear()
+    plt.imshow(heat.data)
+    plt.axis('off')
+    plt.savefig('heat_{0:03d}.png'.format(iter))
+
+
+# main_cupy.py
+
+def start_time (): return time.perf_counter()
+def stop_time (): return time.perf_counter()
+
+
+def main(args):
+    # Set up the solver
+    current, previous, nsteps = initialize(args)
+
+    # Output the initial field and its temperature
+    field_write(current, 0)
+    average_temp = field_average(current)
+    print("Average temperature, start: %f" % average_temp)
+
+    # Set diffusivity constant
+    a = 0.5
+    # Compute the largest stable time step
+    dx2 = current.dx * current.dx
+    dy2 = current.dy * current. dy
+    dt = dx2 * dy2 / (2.0 * a * (dx2 + dy2))
+    # Set output interval
+    output_interval = 1500
+
+    # Start timer
+    start_clock = start_time()
+
+    # NOTE: Allocate and transfer device buffers, add them to the field object
+    import cupy as cp
+    current.dev = cp.asarray(current.data)
+    previous.dev = cp.asarray(previous.data)
+
+    # Time evolution
+    for iter in range(1,nsteps+1):
+        evolve(current, previous, a, dt)
+        if (iter % output_interval == 0):
+            # Update data on host for output
+            current.data = cp.asnumpy(current.dev)
+            field_write(current, iter)
+        # Swap current and previous fields for next iteration step
+        current, previous = previous, current
+    # Copy data back to host
+    previous.data = cp.asnumpy(previous.dev)
+    # Stop timer
+    stop_clock = stop_time()
+
+    # Output the final field and its temperature
+    average_temp = field_average(previous)
+    print("Average temperature at end: %f" % average_temp)
+    # Compare temperature for reference
+    if (args.rows == ROWS and args.cols == COLS and args.nsteps == NSTEPS):
+        print("Control temperature at end: 59.281239")
+    field_write(previous, nsteps)
+
+    # Determine the computation time used for all the iterations
+    print("Iterations took %.3f seconds." % (stop_clock - start_clock))
+
+
+if __name__ == '__main__':
+    # Process command line arguments
+    parser = argparse.ArgumentParser(description='Heat flow example')
+    parser.add_argument('rows', type=int, nargs='?', default=ROWS,
+                        help='number of grid rows')
+    parser.add_argument('cols', type=int, nargs='?', default=COLS,
+                        help='number of grid cols')
+    parser.add_argument('nsteps', type=int, nargs='?', default=NSTEPS,
+                        help='number of time steps')
+
+    args = parser.parse_args()
+
+    ### NOTE FOR COLAB / JUPYTER NOTEBOOK USERS:
+    #   * Use lines below to change default arguments
+    #   * First-time runs are routinely slower; run main cell twice to get timings
+    #   * Google machine runs out of system RAM around 15000x15000 grid size
+    #args = parser.parse_args(args=[])
+    #args.rows, args.cols, args.nsteps = 2000, 2000, 500
+
+    main(args)
diff --git a/content/examples/stencil/python-jax/core.py b/content/examples/stencil/python-jax/core.py
@@ -0,0 +1,46 @@
+# (c) 2023 ENCCS, CSC and the contributors
+from heat import *
+
+# core.py
+
+import jax
+import jax.numpy as jnp
+from functools import partial
+
+
+# Update the temperature values using five-point stencil
+# Arguments:
+#   curr: current temperature field object
+#   prev: temperature field from previous time step
+#   a: diffusivity
+#   dt: time step
+def evolve(current, previous, a, dt):
+    dx2, dy2 = previous.dx**2, previous.dy**2
+    curr, prev = current. data, previous.data
+    # Run JAX-accelerated update
+    result = _evolve(curr, prev, a, dt, dx2, dy2)
+    current.data = np.array(result)
+
+
+@partial(jax.jit, static_argnums=())
+def _evolve(curr, prev, a, dt, dx2, dy2):
+    curr = jnp.asarray(curr, dtype=float)
+    prev = jnp.asarray(prev, dtype=float)
+    nx, ny = prev.shape # These are the FULL dims, rows+2 / cols+2
+
+    # Create interior slice
+    i_indices = jnp.arange(1, nx-1)
+    j_indices = jnp.arange(1, ny-1)
+
+    # Vectorized stencil operation
+    def update_interior():
+        laplacian_x = (prev[2:nx, 1:ny-1] - 2*prev[1:nx-1, 1:ny-1] + prev[0:nx-2, 1:ny-1]) / dx2
+        laplacian_y = (prev[1:nx-1, 2:ny] - 2*prev[1:nx-1, 1:ny-1] + prev[1:nx-1, 0:ny-2]) / dy2
+        interior_update = prev[1:nx-1, 1:ny-1] + a * dt * (laplacian_x + laplacian_y)
+        return interior_update
+
+    interior = update_interior()
+
+    # Reconstruct full array with boundaries
+    curr_new = curr.at[1:nx-1, 1:ny-1].set(interior)
+    return curr_new
diff --git a/content/examples/stencil/python-jax/heat.py b/content/examples/stencil/python-jax/heat.py
@@ -0,0 +1,86 @@
+# (c) 2023 ENCCS, CSC and the contributors
+
+# heat.py
+
+# Fixed grid spacing
+DX = 0.01
+DY = 0.01
+# Default temperatures
+T_DISC = 5.0
+T_AREA = 65.0
+T_UPPER = 85.0
+T_LOWER = 5.0
+T_LEFT = 20.0
+T_RIGHT = 70.0
+# Default problem size
+ROWS = 2000
+COLS = 2000
+NSTEPS = 500
+
+
+import copy
+import numpy as np
+import jax
+import jax.numpy as jnp
+from functools import partial
+
+class Field:
+    def __init__(self, rows, cols):
+        self.data = jnp.zeros((rows+2, cols+2), dtype=float)
+        self.nx, self.ny = rows, cols
+        self.dx, self.dy = DX, DY
+
+
+# setup. py
+
+def initialize(args):
+    rows, cols, nsteps = args.rows, args.cols, args.nsteps
+    current, previous = field_create(rows, cols)
+    return current, previous, nsteps
+
+
+def field_create (rows, cols):
+    heat1 = field_generate(rows, cols)
+    heat2 = copy.deepcopy(heat1)
+    return heat1, heat2
+
+
+def field_generate(rows, cols):
+    heat = Field(rows, cols)
+    data, nx, ny = heat.data, heat.nx, heat.ny
+    heat.data = _generate(data, nx, ny)
+    return heat
+
+
+def field_average(heat):
+    return float(jnp.mean(heat.data[1:-1, 1:-1]))
+
+
+def _generate(data, nx, ny):
+    # Radius of the source disc
+    radius = nx / 6.0
+
+    # Create index arrays
+    i_indices = jnp.arange(nx+2)
+    j_indices = jnp.arange(ny+2)
+    i_grid, j_grid = jnp.meshgrid(i_indices, j_indices, indexing='ij')
+
+    # Distance from origin
+    dx = i_grid - nx / 2 + 1
+    dy = j_grid - ny / 2 + 1
+    distance_sq = dx * dx + dy * dy
+
+    # Initialize with T_AREA, then set disc region
+    result = jnp.full((nx+2, ny+2), T_AREA, dtype=float)
+    result = jnp.where(distance_sq < radius * radius, T_DISC, result)
+
+    # Boundary conditions
+    # Left and right boundaries
+    result = result.at[:, 0].set(T_LEFT)
+    result = result. at[:, ny+1].set(T_RIGHT)
+
+    # Top and bottom boundaries
+    result = result. at[0, :].set(T_UPPER)
+    result = result.at[nx+1, :].set(T_LOWER)
+
+    return result
diff --git a/content/examples/stencil/python/main.py → content/examples/stencil/python-jax/main.py b/content/examples/stencil/python/main.py → content/examples/stencil/python-jax/main.py
@@ -88,4 +88,4 @@ def main(args):
     #args.rows, args.cols, args.nsteps = 2000, 2000, 500    
 
     main(args)
-    
+
diff --git a/content/examples/stencil/python/core.py → ...ent/examples/stencil/python-numba/core.py b/content/examples/stencil/python/core.py → ...ent/examples/stencil/python-numba/core.py
diff --git a/content/examples/stencil/python/core_cuda.py → ...xamples/stencil/python-numba/core_cuda.py b/content/examples/stencil/python/core_cuda.py → ...xamples/stencil/python-numba/core_cuda.py
diff --git a/content/examples/stencil/python/heat.py → ...ent/examples/stencil/python-numba/heat.py b/content/examples/stencil/python/heat.py → ...ent/examples/stencil/python-numba/heat.py
Original file line number	Diff line number	Diff line change
Expand Up		@@ -88,4 +88,4 @@ def main(args):
		#args.rows, args.cols, args.nsteps = 2000, 2000, 500

		main(args)