From 791dfd39a4c645f0610aefed4b39c452dc839218 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 13 May 2022 17:29:05 -0500 Subject: [PATCH 01/59] Initial work towards using array contexts --- setup.py | 1 + sumpy/e2e.py | 14 +------ sumpy/e2p.py | 8 +--- sumpy/fmm.py | 96 +++++++++++++++++++++++------------------------- sumpy/p2e.py | 14 +++---- sumpy/p2p.py | 21 +++++------ sumpy/tools.py | 10 +---- test/test_fmm.py | 3 +- 8 files changed, 69 insertions(+), 98 deletions(-) diff --git a/setup.py b/setup.py index 19ca094fc..51c60fb2e 100644 --- a/setup.py +++ b/setup.py @@ -104,6 +104,7 @@ def write_git_revision(package_name): "arraycontext", "pytest>=2.3", "pyrsistent>=0.16.0", + "arraycontext", "dataclasses>=0.7;python_version<='3.6'", "sympy>=0.7.2", "pymbolic>=2021.1", diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 253f85592..b005f1ced 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -49,8 +49,8 @@ # {{{ translation base class class E2EBase(KernelCacheWrapper): - def __init__(self, ctx, src_expansion, tgt_expansion, - name=None, device=None): + def __init__(self, src_expansion, tgt_expansion, + name=None): """ :arg expansion: a subclass of :class:`sympy.expansion.ExpansionBase` :arg strength_usage: A list of integers indicating which expression @@ -59,9 +59,6 @@ def __init__(self, ctx, src_expansion, tgt_expansion, Default: all kernels use the same strength. """ - if device is None: - device = ctx.devices[0] - if src_expansion is tgt_expansion: from sumpy.kernel import (TargetTransformationRemover, SourceTransformationRemover) @@ -80,11 +77,9 @@ def __init__(self, ctx, src_expansion, tgt_expansion, SourceTransformationRemover()( TargetTransformationRemover()(tgt_expansion.kernel))) - self.ctx = ctx self.src_expansion = src_expansion self.tgt_expansion = tgt_expansion self.name = name or self.default_name - self.device = device if src_expansion.dim != tgt_expansion.dim: raise ValueError("source and target expansions must have " @@ -149,11 +144,6 @@ class E2EFromCSR(E2EBase): default_name = "e2e_from_csr" - def __init__(self, ctx, src_expansion, tgt_expansion, - name=None, device=None): - super().__init__(ctx, src_expansion, tgt_expansion, - name=name, device=device) - def get_translation_loopy_insns(self): from sumpy.symbolic import make_sym_vector dvec = make_sym_vector("d", self.dim) diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 0f0e1d166..a229b8931 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -43,8 +43,7 @@ # {{{ E2P base class class E2PBase(KernelCacheWrapper): - def __init__(self, ctx, expansion, kernels, - name=None, device=None): + def __init__(self, expansion, kernels, name=None): """ :arg expansion: a subclass of :class:`sympy.expansion.ExpansionBase` :arg strength_usage: A list of integers indicating which expression @@ -53,9 +52,6 @@ def __init__(self, ctx, expansion, kernels, Default: all kernels use the same strength. """ - if device is None: - device = ctx.devices[0] - from sumpy.kernel import (SourceTransformationRemover, TargetTransformationRemover) sxr = SourceTransformationRemover() @@ -67,11 +63,9 @@ def __init__(self, ctx, expansion, kernels, for knl in kernels: assert txr(knl) == expansion.kernel - self.ctx = ctx self.expansion = expansion self.kernels = kernels self.name = name or self.default_name - self.device = device self.dim = expansion.dim diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 30ca38384..4bdc85f8f 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -27,12 +27,15 @@ """ +import numpy as np import pyopencl as cl import pyopencl.array # noqa from pytools import memoize_method from boxtree.fmm import TreeIndependentDataForWrangler, ExpansionWranglerInterface +from arraycontext import ArrayContext, Array + from sumpy import ( P2EFromSingleBox, P2EFromCSR, E2PFromSingleBox, E2PFromCSR, @@ -61,7 +64,7 @@ class SumpyTreeIndependentDataForWrangler(TreeIndependentDataForWrangler): profiling enabled. """ - def __init__(self, cl_context, + def __init__(self, setup_actx: ArrayContext, multipole_expansion_factory, local_expansion_factory, target_kernels, exclude_self=False, use_rscale=None, @@ -76,6 +79,8 @@ def __init__(self, cl_context, :arg strength_usage: passed unchanged to p2l, p2m and p2p. :arg source_kernels: passed unchanged to p2l, p2m and p2p. """ + self._setup_actx = setup_actx.clone() + self.multipole_expansion_factory = multipole_expansion_factory self.local_expansion_factory = local_expansion_factory self.source_kernels = source_kernels @@ -86,8 +91,6 @@ def __init__(self, cl_context, super().__init__() - self.cl_context = cl_context - @memoize_method def get_base_kernel(self): from pytools import single_valued @@ -107,21 +110,21 @@ def m2l_translation(self): @memoize_method def p2m(self, tgt_order): - return P2EFromSingleBox(self.cl_context, + return P2EFromSingleBox( kernels=self.source_kernels, expansion=self.multipole_expansion(tgt_order), strength_usage=self.strength_usage) @memoize_method def p2l(self, tgt_order): - return P2EFromCSR(self.cl_context, + return P2EFromCSR( kernels=self.source_kernels, expansion=self.local_expansion(tgt_order), strength_usage=self.strength_usage) @memoize_method def m2m(self, src_order, tgt_order): - return E2EFromChildren(self.cl_context, + return E2EFromChildren( self.multipole_expansion(src_order), self.multipole_expansion(tgt_order)) @@ -132,49 +135,49 @@ def m2l(self, src_order, tgt_order, m2l_class = M2LUsingTranslationClassesDependentData else: m2l_class = E2EFromCSR - return m2l_class(self.cl_context, + return m2l_class( self.multipole_expansion(src_order), self.local_expansion(tgt_order)) @memoize_method def m2l_translation_class_dependent_data_kernel(self, src_order, tgt_order): - return M2LGenerateTranslationClassesDependentData(self.cl_context, + return M2LGenerateTranslationClassesDependentData( self.multipole_expansion(src_order), self.local_expansion(tgt_order)) @memoize_method def m2l_preprocess_mpole_kernel(self, src_order, tgt_order): - return M2LPreprocessMultipole(self.cl_context, + return M2LPreprocessMultipole( self.multipole_expansion(src_order), self.local_expansion(tgt_order)) @memoize_method def m2l_postprocess_local_kernel(self, src_order, tgt_order): - return M2LPostprocessLocal(self.cl_context, + return M2LPostprocessLocal( self.multipole_expansion(src_order), self.local_expansion(tgt_order)) @memoize_method def l2l(self, src_order, tgt_order): - return E2EFromParent(self.cl_context, + return E2EFromParent( self.local_expansion(src_order), self.local_expansion(tgt_order)) @memoize_method def m2p(self, src_order): - return E2PFromCSR(self.cl_context, + return E2PFromCSR( self.multipole_expansion(src_order), self.target_kernels) @memoize_method def l2p(self, src_order): - return E2PFromSingleBox(self.cl_context, + return E2PFromSingleBox( self.local_expansion(src_order), self.target_kernels) @memoize_method def p2p(self): - return P2PFromCSR(self.cl_context, target_kernels=self.target_kernels, + return P2PFromCSR(target_kernels=self.target_kernels, source_kernels=self.source_kernels, exclude_self=self.exclude_self, strength_usage=self.strength_usage) @@ -386,31 +389,21 @@ def order_to_size(order): return build_csr_level_starts(self.level_orders, order_to_size, level_starts=self.m2l_translation_class_level_start_box_nrs()) - def multipole_expansion_zeros(self, template_ary): + def multipole_expansion_zeros(self, actx: ArrayContext) -> Array: """Return an expansions array (which must support addition) capable of holding one multipole or local expansion for every box in the tree. - :arg template_ary: an array (not necessarily of the same shape or dtype as - the one to be created) whose run-time environment - (e.g. :class:`pyopencl.CommandQueue`) the returned array should - reuse. """ - return cl.array.zeros( - template_ary.queue, + return actx.zeros( self.multipole_expansions_level_starts()[-1], dtype=self.dtype) - def local_expansion_zeros(self, template_ary): + def local_expansion_zeros(self, actx) -> Array: """Return an expansions array (which must support addition) capable of holding one multipole or local expansion for every box in the tree. - :arg template_ary: an array (not necessarily of the same shape or dtype as - the one to be created) whose run-time environment - (e.g. :class:`pyopencl.CommandQueue`) the returned array should - reuse. """ - return cl.array.zeros( - template_ary.queue, + return actx.zeros( self.local_expansions_level_starts()[-1], dtype=self.dtype) @@ -483,27 +476,22 @@ def m2l_preproc_mpole_expansions_view(self, mpole_exps, level): m2l_work_array_level_starts = \ m2l_preproc_mpole_expansions_level_starts - def output_zeros(self, template_ary): + def output_zeros(self, actx: ArrayContext) -> np.ndarray: """Return a potentials array (which must support addition) capable of holding a potential value for each target in the tree. Note that :func:`drive_fmm` makes no assumptions about *potential* other than that it supports addition--it may consist of potentials, gradients of the potential, or arbitrary other per-target output data. - :arg template_ary: an array (not necessarily of the same shape or dtype as - the one to be created) whose run-time environment - (e.g. :class:`pyopencl.CommandQueue`) the returned array should - reuse. """ from pytools.obj_array import make_obj_array return make_obj_array([ - cl.array.zeros( - template_ary.queue, + actx.zeros( self.tree.ntargets, dtype=self.dtype) for k in self.tree_indep.target_kernels]) def reorder_sources(self, source_array): - return source_array.with_queue(source_array.queue)[self.tree.user_source_ids] + return source_array[self.tree.user_source_ids] def reorder_potentials(self, potentials): from pytools.obj_array import obj_array_vectorize @@ -559,16 +547,14 @@ def run_opencl_fft(self, queue, input_vec, inverse, wait_for): return run_opencl_fft(app, queue, input_vec, inverse, wait_for) def form_multipoles(self, + actx: ArrayContext, level_start_source_box_nrs, source_boxes, src_weight_vecs): - mpoles = self.multipole_expansion_zeros(src_weight_vecs[0]) + mpoles = self.multipole_expansion_zeros(actx) kwargs = self.extra_kwargs.copy() kwargs.update(self.box_source_list_kwargs()) - events = [] - queue = src_weight_vecs[0].queue - for lev in range(self.tree.nlevels): p2m = self.tree_indep.p2m(self.level_orders[lev]) start, stop = level_start_source_box_nrs[lev:lev+2] @@ -579,7 +565,7 @@ def form_multipoles(self, mpoles, lev) evt, (mpoles_res,) = p2m( - queue, + actx, source_boxes=source_boxes[start:stop], centers=self.tree.box_centers, strengths=src_weight_vecs, @@ -588,13 +574,13 @@ def form_multipoles(self, rscale=self.level_to_rscale(lev), **kwargs) - events.append(evt) assert mpoles_res is mpoles_view - return (mpoles, SumpyTimingFuture(queue, events)) + return mpoles def coarsen_multipoles(self, + actx: ArrayContext, level_start_source_parent_box_nrs, source_parent_boxes, mpoles): @@ -652,9 +638,11 @@ def coarsen_multipoles(self, return (mpoles, SumpyTimingFuture(queue, events)) - def eval_direct(self, target_boxes, source_box_starts, + def eval_direct(self, + actx: ArrayContext, + target_boxes, source_box_starts, source_box_lists, src_weight_vecs): - pot = self.output_zeros(src_weight_vecs[0]) + pot = self.output_zeros(actx) kwargs = self.extra_kwargs.copy() kwargs.update(self.self_extra_kwargs) @@ -758,12 +746,13 @@ def _add_m2l_precompute_kwargs(self, kwargs_for_m2l, self.translation_classes_data.from_sep_siblings_translation_classes def multipole_to_local(self, + actx: ArrayContext, level_start_target_box_nrs, target_boxes, src_box_starts, src_box_lists, mpole_exps): queue = mpole_exps.queue - local_exps = self.local_expansion_zeros(mpole_exps) + local_exps = self.local_expansion_zeros(actx) if self.tree_indep.m2l_translation.use_preprocessing: preprocessed_mpole_exps = \ @@ -900,8 +889,9 @@ def multipole_to_local(self, return (local_exps, SumpyTimingFuture(queue, timing_events)) def eval_multipoles(self, + actx: ArrayContext, target_boxes_by_source_level, source_boxes_by_level, mpole_exps): - pot = self.output_zeros(mpole_exps) + pot = self.output_zeros(actx) kwargs = self.kernel_extra_kwargs.copy() kwargs.update(self.box_target_list_kwargs()) @@ -951,9 +941,10 @@ def eval_multipoles(self, return (pot, SumpyTimingFuture(queue, events)) def form_locals(self, + actx: ArrayContext, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, starts, lists, src_weight_vecs): - local_exps = self.local_expansion_zeros(src_weight_vecs[0]) + local_exps = self.local_expansion_zeros(actx) kwargs = self.extra_kwargs.copy() kwargs.update(self.box_source_list_kwargs()) @@ -993,6 +984,7 @@ def form_locals(self, return (local_exps, SumpyTimingFuture(queue, events)) def refine_locals(self, + actx: ArrayContext, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, local_exps): @@ -1038,8 +1030,10 @@ def refine_locals(self, return (local_exps, SumpyTimingFuture(queue, [evt])) - def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): - pot = self.output_zeros(local_exps) + def eval_locals(self, + actx: ArrayContext, + level_start_target_box_nrs, target_boxes, local_exps): + pot = self.output_zeros(actx) kwargs = self.kernel_extra_kwargs.copy() kwargs.update(self.box_target_list_kwargs()) @@ -1077,7 +1071,7 @@ def eval_locals(self, level_start_target_box_nrs, target_boxes, local_exps): return (pot, SumpyTimingFuture(queue, events)) - def finalize_potentials(self, potentials, template_ary): + def finalize_potentials(self, actx: ArrayContext, potentials): return potentials # }}} diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 177d0b586..3fd071b53 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -49,8 +49,8 @@ class P2EBase(KernelComputation, KernelCacheWrapper): .. automethod:: __init__ """ - def __init__(self, ctx, expansion, kernels=None, - name=None, device=None, strength_usage=None): + def __init__(self, expansion, kernels=None, + name=None, strength_usage=None): """ :arg expansion: a subclass of :class:`sumpy.expansion.ExpansionBase` :arg kernels: if not provided, the kernel of the *expansion* is used. @@ -78,10 +78,10 @@ def __init__(self, ctx, expansion, kernels=None, assert txr(knl) == knl assert sxr(knl) == expansion.kernel - KernelComputation.__init__(self, ctx=ctx, target_kernels=[], + KernelComputation.__init__(self, target_kernels=[], source_kernels=kernels, strength_usage=strength_usage, value_dtypes=None, - name=name, device=device) + name=name) self.expansion = expansion self.dim = expansion.dim @@ -135,11 +135,11 @@ def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array): enforce_variable_access_ordered="no_check") return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): from sumpy.tools import is_obj_array_like sources = kwargs.pop("sources") centers = kwargs.pop("centers") - knl = self.get_cached_optimized_kernel( + knl = self.get_kernel( sources_is_obj_array=is_obj_array_like(sources), centers_is_obj_array=is_obj_array_like(centers)) @@ -148,7 +148,7 @@ def __call__(self, queue, **kwargs): dtype = centers[0].dtype if is_obj_array_like(centers) else centers.dtype rscale = dtype.type(kwargs.pop("rscale")) - return knl(queue, sources=sources, centers=centers, rscale=rscale, **kwargs) + return actx.call_loopy(knl, sources=sources, centers=centers, rscale=rscale, **kwargs) # }}} diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 83506a377..597ec337d 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -51,8 +51,8 @@ # {{{ p2p base class class P2PBase(KernelComputation, KernelCacheWrapper): - def __init__(self, ctx, target_kernels, exclude_self, strength_usage=None, - value_dtypes=None, name=None, device=None, source_kernels=None): + def __init__(self, target_kernels, exclude_self, strength_usage=None, + value_dtypes=None, name=None, source_kernels=None): """ :arg target_kernels: list of :class:`sumpy.kernel.Kernel` instances with only target derivatives. @@ -82,9 +82,9 @@ def __init__(self, ctx, target_kernels, exclude_self, strength_usage=None, base_target_kernel = single_valued(txr(knl) for knl in target_kernels) assert base_source_kernel == base_target_kernel - KernelComputation.__init__(self, ctx=ctx, target_kernels=target_kernels, + KernelComputation.__init__(self, target_kernels=target_kernels, source_kernels=source_kernels, strength_usage=strength_usage, - value_dtypes=value_dtypes, name=name, device=device) + value_dtypes=value_dtypes, name=name) self.exclude_self = exclude_self @@ -94,8 +94,7 @@ def __init__(self, ctx, target_kernels, exclude_self, strength_usage=None, def get_cache_key(self): return (type(self).__name__, tuple(self.target_kernels), self.exclude_self, tuple(self.strength_usage), tuple(self.value_dtypes), - tuple(self.source_kernels), - self.device.hashable_model_and_version_identifier) + tuple(self.source_kernels)) def get_loopy_insns_and_result_names(self): from sumpy.symbolic import make_sym_vector @@ -638,10 +637,8 @@ def get_kernel(self, max_nsources_in_one_box, max_ntargets_in_one_box, return loopy_knl def get_optimized_kernel(self, max_nsources_in_one_box, - max_ntargets_in_one_box): - import pyopencl as cl - dev = self.context.devices[0] - if dev.type & cl.device_type.CPU: + max_ntargets_in_one_box, is_cpu): + if is_cpu: knl = self.get_kernel(max_nsources_in_one_box, max_ntargets_in_one_box, gpu=False) knl = lp.split_iname(knl, "itgt_box", 4, outer_tag="g.0") @@ -661,11 +658,13 @@ def get_optimized_kernel(self, max_nsources_in_one_box, return knl def __call__(self, queue, **kwargs): + import pyopencl as cl max_nsources_in_one_box = kwargs.pop("max_nsources_in_one_box") max_ntargets_in_one_box = kwargs.pop("max_ntargets_in_one_box") knl = self.get_cached_optimized_kernel( max_nsources_in_one_box=max_nsources_in_one_box, - max_ntargets_in_one_box=max_ntargets_in_one_box) + max_ntargets_in_one_box=max_ntargets_in_one_box, + is_cpu=queue.dev.type & cl.device_type.CPU) return knl(queue, **kwargs) diff --git a/sumpy/tools.py b/sumpy/tools.py index d3b61b8d1..67deb911c 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -579,8 +579,8 @@ class ScalingAssignmentTag(Tag): class KernelComputation: """Common input processing for kernel computations.""" - def __init__(self, ctx, target_kernels, source_kernels, strength_usage, - value_dtypes, name, device=None): + def __init__(self, target_kernels, source_kernels, strength_usage, + value_dtypes, name): """ :arg kernels: list of :class:`sumpy.kernel.Kernel` instances :class:`sumpy.kernel.TargetDerivative` wrappers should be @@ -618,12 +618,6 @@ def __init__(self, ctx, target_kernels, source_kernels, strength_usage, # }}} - if device is None: - device = ctx.devices[0] - - self.context = ctx - self.device = device - self.source_kernels = tuple(source_kernels) self.target_kernels = tuple(target_kernels) self.value_dtypes = value_dtypes diff --git a/test/test_fmm.py b/test/test_fmm.py index f447999cf..af85c5a3b 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -142,7 +142,6 @@ def _test_sumpy_fmm(actx_factory, knl, local_expn_class, mpole_expn_class, from boxtree import TreeBuilder tb = TreeBuilder(actx.context) - tree, _ = tb(actx.queue, sources, targets=targets, max_particles_in_box=30, debug=True) @@ -235,7 +234,7 @@ def fmm_level_to_order(kernel, kernel_args, tree, lev): from boxtree.fmm import drive_fmm - pot, = drive_fmm(wrangler, (weights,)) + pot, = drive_fmm(actx, wrangler, (weights,)) from sumpy import P2P p2p = P2P(actx.context, target_kernels, exclude_self=False) From 2da9092777a5e12fa59a126adabce7e0560c038c Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 3 Sep 2022 15:50:35 +0300 Subject: [PATCH 02/59] update tests to pass actx --- test/test_fmm.py | 111 +++++++++++++++++++---------------------- test/test_kernels.py | 52 +++++++++---------- test/test_matrixgen.py | 25 +++++----- test/test_misc.py | 4 +- test/test_qbx.py | 12 ++--- test/test_tools.py | 2 +- 6 files changed, 98 insertions(+), 108 deletions(-) diff --git a/test/test_fmm.py b/test/test_fmm.py index af85c5a3b..1825b92fe 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -125,12 +125,12 @@ def _test_sumpy_fmm(actx_factory, knl, local_expn_class, mpole_expn_class, dtype = np.float64 from boxtree.tools import make_normal_particle_array as p_normal - sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15) + sources = p_normal(actx, nsources, knl.dim, dtype, seed=15) if 1: offset = np.zeros(knl.dim) offset[0] = 0.1 - targets = offset + p_normal(actx.queue, ntargets, knl.dim, dtype, seed=18) + targets = offset + p_normal(actx, ntargets, knl.dim, dtype, seed=18) del offset else: @@ -141,19 +141,19 @@ def _test_sumpy_fmm(actx_factory, knl, local_expn_class, mpole_expn_class, targets = make_obj_array([fp.points[i] for i in range(knl.dim)]) from boxtree import TreeBuilder - tb = TreeBuilder(actx.context) - tree, _ = tb(actx.queue, sources, targets=targets, + tb = TreeBuilder(actx) + tree, _ = tb(actx, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder - tbuild = FMMTraversalBuilder(actx.context) - trav, _ = tbuild(actx.queue, tree, debug=True) + tbuild = FMMTraversalBuilder(actx) + trav, _ = tbuild(actx, tree, debug=True) # {{{ plot tree if 0: - host_tree = tree.get(actx.queue) - host_trav = trav.get(actx.queue) + host_tree = actx.to_numpy(tree) + host_trav = actx.to_numpy(trav) if 0: logger.info("src_box: %s", host_tree.find_box_nr_for_source(403)) @@ -215,7 +215,7 @@ def _test_sumpy_fmm(actx_factory, knl, local_expn_class, mpole_expn_class, knl, local_expn_class)() tree_indep = SumpyTreeIndependentDataForWrangler( - actx.context, + actx, partial(mpole_expn_class, knl), partial(local_expn_class, knl, m2l_translation=m2l_translation), target_kernels) @@ -237,9 +237,8 @@ def fmm_level_to_order(kernel, kernel_args, tree, lev): pot, = drive_fmm(actx, wrangler, (weights,)) from sumpy import P2P - p2p = P2P(actx.context, target_kernels, exclude_self=False) - evt, (ref_pot,) = p2p(actx.queue, targets, sources, (weights,), - **extra_kwargs) + p2p = P2P(actx, target_kernels, exclude_self=False) + evt, (ref_pot,) = p2p(actx, targets, sources, (weights,), **extra_kwargs) pot = actx.to_numpy(pot) ref_pot = actx.to_numpy(ref_pot) @@ -273,21 +272,19 @@ def test_coeff_magnitude_rscale(actx_factory, knl): from boxtree.tools import make_normal_particle_array as p_normal - sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15) + sources = p_normal(actx, nsources, knl.dim, dtype, seed=15) offset = np.zeros(knl.dim) offset[0] = 0.1 - targets = offset + p_normal(actx.queue, ntargets, knl.dim, dtype, seed=18) + targets = offset + p_normal(actx, ntargets, knl.dim, dtype, seed=18) from boxtree import TreeBuilder - tb = TreeBuilder(actx.context) - - tree, _ = tb(actx.queue, sources, targets=targets, - max_particles_in_box=30, debug=True) + tb = TreeBuilder(actx) + tree, _ = tb(actx, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder - tbuild = FMMTraversalBuilder(actx.context) - trav, _ = tbuild(actx.queue, tree, debug=True) + tbuild = FMMTraversalBuilder(actx) + trav, _ = tbuild(actx, tree, debug=True) rng = np.random.default_rng(31) weights = actx.from_numpy(rng.random(nsources, dtype=np.float64)) @@ -306,7 +303,7 @@ def test_coeff_magnitude_rscale(actx_factory, knl): target_kernels = [knl] tree_indep = SumpyTreeIndependentDataForWrangler( - actx.context, + actx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), target_kernels) @@ -361,22 +358,20 @@ def test_unified_single_and_double(actx_factory, visualize=False): from boxtree.tools import make_normal_particle_array as p_normal - sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15) + sources = p_normal(actx, nsources, knl.dim, dtype, seed=15) offset = np.zeros(knl.dim) offset[0] = 0.1 - targets = offset + p_normal(actx.queue, ntargets, knl.dim, dtype, seed=18) + targets = offset + p_normal(actx, ntargets, knl.dim, dtype, seed=18) del offset from boxtree import TreeBuilder - tb = TreeBuilder(actx.context) - - tree, _ = tb(actx.queue, sources, targets=targets, - max_particles_in_box=30, debug=True) + tb = TreeBuilder(actx) + tree, _ = tb(actx, sources, targets=targets, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder - tbuild = FMMTraversalBuilder(actx.context) - trav, _ = tbuild(actx.queue, tree, debug=True) + tbuild = FMMTraversalBuilder(actx) + trav, _ = tbuild(actx, tree, debug=True) rng = np.random.default_rng(44) weights = ( @@ -406,7 +401,7 @@ def test_unified_single_and_double(actx_factory, visualize=False): if deriv_knl in source_kernels: source_extra_kwargs["dir_vec"] = dir_vec tree_indep = SumpyTreeIndependentDataForWrangler( - actx.context, + actx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), target_kernels=target_kernels, source_kernels=source_kernels, @@ -454,16 +449,15 @@ def test_sumpy_fmm_timing_data_collection(ctx_factory, use_fft, visualize=False) mpole_expn_class = VolumeTaylorMultipoleExpansion order = 1 - sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15) + sources = p_normal(actx, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder - tb = TreeBuilder(actx.context) - - tree, _ = tb(actx.queue, sources, max_particles_in_box=30, debug=True) + tb = TreeBuilder(actx) + tree, _ = tb(actx, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder - tbuild = FMMTraversalBuilder(actx.context) - trav, _ = tbuild(actx.queue, tree, debug=True) + tbuild = FMMTraversalBuilder(actx) + trav, _ = tbuild(actx, tree, debug=True) rng = np.random.default_rng(44) weights = actx.from_numpy(rng.random(nsources, dtype=np.float64)) @@ -481,7 +475,7 @@ def test_sumpy_fmm_timing_data_collection(ctx_factory, use_fft, visualize=False) knl, local_expn_class)() tree_indep = SumpyTreeIndependentDataForWrangler( - actx.context, + actx, partial(mpole_expn_class, knl), partial(local_expn_class, knl, m2l_translation=m2l_translation), target_kernels) @@ -513,16 +507,16 @@ def test_sumpy_fmm_exclude_self(actx_factory, visualize=False): mpole_expn_class = VolumeTaylorMultipoleExpansion order = 10 - sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15) + sources = p_normal(actx, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder - tb = TreeBuilder(actx.context) + tb = TreeBuilder(actx) - tree, _ = tb(actx.queue, sources, max_particles_in_box=30, debug=True) + tree, _ = tb(actx, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder - tbuild = FMMTraversalBuilder(actx.context) - trav, _ = tbuild(actx.queue, tree, debug=True) + tbuild = FMMTraversalBuilder(actx) + trav, _ = tbuild(actx, tree, debug=True) rng = np.random.default_rng(44) weights = actx.from_numpy(rng.random(nsources, dtype=np.float64)) @@ -533,7 +527,7 @@ def test_sumpy_fmm_exclude_self(actx_factory, visualize=False): target_kernels = [knl] tree_indep = SumpyTreeIndependentDataForWrangler( - actx.context, + actx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), target_kernels, @@ -548,9 +542,8 @@ def test_sumpy_fmm_exclude_self(actx_factory, visualize=False): pot, = drive_fmm(wrangler, (weights,)) from sumpy import P2P - p2p = P2P(actx.context, target_kernels, exclude_self=True) - evt, (ref_pot,) = p2p(actx.queue, sources, sources, (weights,), - **self_extra_kwargs) + p2p = P2P(actx, target_kernels, exclude_self=True) + evt, (ref_pot,) = p2p(actx, sources, sources, (weights,), **self_extra_kwargs) pot = actx.to_numpy(pot) ref_pot = actx.to_numpy(ref_pot) @@ -581,17 +574,15 @@ def test_sumpy_axis_source_derivative(actx_factory, visualize=False): mpole_expn_class = VolumeTaylorMultipoleExpansion order = 10 - sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15) + sources = p_normal(actx, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder - tb = TreeBuilder(actx.context) - - tree, _ = tb(actx.queue, sources, - max_particles_in_box=30, debug=True) + tb = TreeBuilder(actx) + tree, _ = tb(actx, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder - tbuild = FMMTraversalBuilder(actx.context) - trav, _ = tbuild(actx.queue, tree, debug=True) + tbuild = FMMTraversalBuilder(actx) + trav, _ = tbuild(actx, tree, debug=True) rng = np.random.default_rng(12) weights = actx.from_numpy(rng.random(nsources, dtype=np.float64)) @@ -606,7 +597,7 @@ def test_sumpy_axis_source_derivative(actx_factory, visualize=False): (AxisTargetDerivative(0, knl), knl), (knl, AxisSourceDerivative(0, knl))]: tree_indep = SumpyTreeIndependentDataForWrangler( - actx.context, + actx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), target_kernels=[tgt_knl], @@ -649,17 +640,17 @@ def test_sumpy_target_point_multiplier(actx_factory, deriv_axes, visualize=False mpole_expn_class = VolumeTaylorMultipoleExpansion order = 5 - sources = p_normal(actx.queue, nsources, knl.dim, dtype, seed=15) + sources = p_normal(actx, nsources, knl.dim, dtype, seed=15) from boxtree import TreeBuilder - tb = TreeBuilder(actx.context) + tb = TreeBuilder(actx) - tree, _ = tb(actx.queue, sources, + tree, _ = tb(actx, sources, max_particles_in_box=30, debug=True) from boxtree.traversal import FMMTraversalBuilder - tbuild = FMMTraversalBuilder(actx.context) - trav, _ = tbuild(actx.queue, tree, debug=True) + tbuild = FMMTraversalBuilder(actx) + trav, _ = tbuild(actx, tree, debug=True) rng = np.random.default_rng(12) weights = actx.from_numpy(rng.random(nsources, dtype=np.float64)) @@ -675,7 +666,7 @@ def test_sumpy_target_point_multiplier(actx_factory, deriv_axes, visualize=False tgt_knls[1] = AxisTargetDerivative(axis, tgt_knls[1]) tree_indep = SumpyTreeIndependentDataForWrangler( - actx.context, + actx, partial(mpole_expn_class, knl), partial(local_expn_class, knl), target_kernels=tgt_knls, diff --git a/test/test_kernels.py b/test/test_kernels.py index bf03f00d4..fdd65b865 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -69,7 +69,7 @@ def test_p2p(actx_factory, exclude_self): from sumpy.p2p import P2P lknl = LaplaceKernel(dimensions) - knl = P2P(actx.context, + knl = P2P(actx, [lknl, AxisTargetDerivative(0, lknl)], exclude_self=exclude_self) @@ -85,7 +85,7 @@ def test_p2p(actx_factory, exclude_self): extra_kwargs["target_to_source"] = np.arange(n, dtype=np.int32) evt, (potential, x_derivative) = knl( - actx.queue, targets, sources, [strengths], + actx, targets, sources, [strengths], out_host=True, **extra_kwargs) potential_ref = np.empty_like(potential) @@ -174,11 +174,11 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): rscale = 0.5 # pick something non-1 # apply p2e at the same time - p2e = P2EFromSingleBox(actx.context, expn, + p2e = P2EFromSingleBox(actx, expn, kernels=source_kernels, strength_usage=[0, 1]) - evt, (mpoles,) = p2e(actx.queue, + evt, (mpoles,) = p2e(actx, source_boxes=source_boxes, box_source_starts=box_source_starts, box_source_counts_nonchild=box_source_counts_nonchild, @@ -202,10 +202,10 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): if isinstance(source_kernel, DirectionalSourceDerivative): extra_source_kwargs["dir_vec"] = dir_vec - p2e = P2EFromSingleBox(actx.context, expn, + p2e = P2EFromSingleBox(actx, expn, kernels=[source_kernel], strength_usage=[i]) - evt, (mpoles,) = p2e(actx.queue, + evt, (mpoles,) = p2e(actx, source_boxes=source_boxes, box_source_starts=box_source_starts, box_source_counts_nonchild=box_source_counts_nonchild, @@ -286,9 +286,9 @@ def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative expn = expn_class(knl, order=order) from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P - p2e = P2EFromSingleBox(actx.context, expn, kernels=[knl]) - e2p = E2PFromSingleBox(actx.context, expn, kernels=target_kernels) - p2p = P2P(actx.context, target_kernels, exclude_self=False) + p2e = P2EFromSingleBox(actx, expn, kernels=[knl]) + e2p = E2PFromSingleBox(actx, expn, kernels=target_kernels) + p2p = P2P(actx, target_kernels, exclude_self=False) from pytools.convergence import EOCRecorder eoc_rec_pot = EOCRecorder() @@ -338,7 +338,7 @@ def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative # {{{ apply p2e - evt, (mpoles,) = p2e(actx.queue, + evt, (mpoles,) = p2e(actx, source_boxes=source_boxes, box_source_starts=box_source_starts, box_source_counts_nonchild=box_source_counts_nonchild, @@ -361,7 +361,7 @@ def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative box_target_counts_nonchild = np.array([ntargets], dtype=np.int32) evt, (pot, grad_x, ) = e2p( - actx.queue, + actx, src_expansions=mpoles, src_base_ibox=0, target_boxes=source_boxes, @@ -378,7 +378,7 @@ def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative # {{{ compute (direct) reference solution evt, (pot_direct, grad_x_direct, ) = p2p( - actx.queue, + actx, targets, sources, (strengths,), out_host=True, **extra_source_kwargs) @@ -550,7 +550,7 @@ def eval_at(e2p, source_box_nr, rscale): e2p_box_target_counts_nonchild[source_box_nr] = ntargets evt, (pot,) = e2p( - actx.queue, + actx, src_expansions=mpoles, src_base_ibox=0, @@ -576,13 +576,13 @@ def eval_at(e2p, source_box_nr, rscale): l_expn = local_expn_class(knl, order=order, m2l_translation=m2l_translation) from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P, E2EFromCSR - p2m = P2EFromSingleBox(actx.context, m_expn) - m2m = E2EFromCSR(actx.context, m_expn, m_expn) - m2p = E2PFromSingleBox(actx.context, m_expn, target_kernels) - m2l = E2EFromCSR(actx.context, m_expn, l_expn) - l2l = E2EFromCSR(actx.context, l_expn, l_expn) - l2p = E2PFromSingleBox(actx.context, l_expn, target_kernels) - p2p = P2P(actx.context, target_kernels, exclude_self=False) + p2m = P2EFromSingleBox(actx, m_expn) + m2m = E2EFromCSR(actx, m_expn, m_expn) + m2p = E2PFromSingleBox(actx, m_expn, target_kernels) + m2l = E2EFromCSR(actx, m_expn, l_expn) + l2l = E2EFromCSR(actx, l_expn, l_expn) + l2p = E2PFromSingleBox(actx, l_expn, target_kernels) + p2p = P2P(actx, target_kernels, exclude_self=False) fp = FieldPlotter(centers[:, -1], extent=0.3, npoints=res) targets = fp.points @@ -590,7 +590,7 @@ def eval_at(e2p, source_box_nr, rscale): # {{{ compute (direct) reference solution evt, (pot_direct,) = p2p( - actx.queue, + actx, targets, sources, (strengths,), out_host=True, **extra_kwargs) @@ -610,7 +610,7 @@ def eval_at(e2p, source_box_nr, rscale): p2m_box_source_counts_nonchild = np.array([nsources, 0, 0, 0], dtype=np.int32) - evt, (mpoles,) = p2m(actx.queue, + evt, (mpoles,) = p2m(actx, source_boxes=p2m_source_boxes, box_source_starts=p2m_box_source_starts, box_source_counts_nonchild=p2m_box_source_counts_nonchild, @@ -641,7 +641,7 @@ def eval_at(e2p, source_box_nr, rscale): m2m_src_box_starts = np.array([0, 1], dtype=np.int32) m2m_src_box_lists = np.array([0], dtype=np.int32) - evt, (mpoles,) = m2m(actx.queue, + evt, (mpoles,) = m2m(actx, src_expansions=mpoles, src_base_ibox=0, tgt_base_ibox=0, @@ -673,7 +673,7 @@ def eval_at(e2p, source_box_nr, rscale): m2l_src_box_starts = np.array([0, 1], dtype=np.int32) m2l_src_box_lists = np.array([1], dtype=np.int32) - evt, (mpoles,) = m2l(actx.queue, + evt, (mpoles,) = m2l(actx, src_expansions=mpoles, src_base_ibox=0, tgt_base_ibox=0, @@ -704,7 +704,7 @@ def eval_at(e2p, source_box_nr, rscale): l2l_src_box_starts = np.array([0, 1], dtype=np.int32) l2l_src_box_lists = np.array([2], dtype=np.int32) - evt, (mpoles,) = l2l(actx.queue, + evt, (mpoles,) = l2l(actx, src_expansions=mpoles, src_base_ibox=0, tgt_base_ibox=0, @@ -917,7 +917,7 @@ def test_m2m_compressed_error_helmholtz(actx_factory, dim, order): for i, (mpole_expn_class, local_expn_class) in \ enumerate(zip(mpole_expn_classes, local_expn_classes)): tctx = toys.ToyContext( - actx.context, + actx, knl, extra_kernel_kwargs=extra_kernel_kwargs, local_expn_class=local_expn_class, diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index 4baf30312..c50981b44 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -111,17 +111,17 @@ def test_qbx_direct(actx_factory, factor, lpot_id, visualize=False): expn = LineTaylorLocalExpansion(knl, order) from sumpy.qbx import LayerPotential - lpot = LayerPotential(actx.context, expansion=expn, source_kernels=(knl,), + lpot = LayerPotential(actx, expansion=expn, source_kernels=(knl,), target_kernels=(base_knl,)) from sumpy.qbx import LayerPotentialMatrixGenerator - mat_gen = LayerPotentialMatrixGenerator(actx.context, + mat_gen = LayerPotentialMatrixGenerator(actx, expansion=expn, source_kernels=(knl,), target_kernels=(base_knl,)) from sumpy.qbx import LayerPotentialMatrixSubsetGenerator - blk_gen = LayerPotentialMatrixSubsetGenerator(actx.context, + blk_gen = LayerPotentialMatrixSubsetGenerator(actx, expansion=expn, source_kernels=(knl,), target_kernels=(base_knl,)) @@ -142,7 +142,7 @@ def test_qbx_direct(actx_factory, factor, lpot_id, visualize=False): actx.from_numpy(make_obj_array(np.ones((ndim, n)))) ) - _, (result_lpot,) = lpot(actx.queue, + _, (result_lpot,) = lpot(actx, targets=targets, sources=sources, centers=centers, @@ -150,7 +150,7 @@ def test_qbx_direct(actx_factory, factor, lpot_id, visualize=False): strengths=strengths, **extra_kwargs) result_lpot = actx.to_numpy(result_lpot) - _, (mat,) = mat_gen(actx.queue, + _, (mat,) = mat_gen(actx, targets=targets, sources=sources, centers=centers, @@ -158,7 +158,7 @@ def test_qbx_direct(actx_factory, factor, lpot_id, visualize=False): mat = actx.to_numpy(mat) result_mat = mat @ actx.to_numpy(strengths[0]) - _, (blk,) = blk_gen(actx.queue, + _, (blk,) = blk_gen(actx, targets=targets, sources=sources, centers=centers, @@ -201,14 +201,13 @@ def test_p2p_direct(actx_factory, exclude_self, factor, lpot_id, visualize=False raise ValueError(f"unknown lpot_id: '{lpot_id}'") from sumpy.p2p import P2P - lpot = P2P(actx.context, [lknl], exclude_self=exclude_self) + lpot = P2P(actx, [lknl], exclude_self=exclude_self) from sumpy.p2p import P2PMatrixGenerator - mat_gen = P2PMatrixGenerator(actx.context, [lknl], exclude_self=exclude_self) + mat_gen = P2PMatrixGenerator(actx, [lknl], exclude_self=exclude_self) from sumpy.p2p import P2PMatrixSubsetGenerator - blk_gen = P2PMatrixSubsetGenerator( - actx.context, [lknl], exclude_self=exclude_self) + blk_gen = P2PMatrixSubsetGenerator(actx, [lknl], exclude_self=exclude_self) for n in [200, 300, 400]: targets, sources, _, _, sigma = ( @@ -229,19 +228,19 @@ def test_p2p_direct(actx_factory, exclude_self, factor, lpot_id, visualize=False extra_kwargs["dsource_vec"] = ( actx.from_numpy(make_obj_array(np.ones((ndim, n))))) - _, (result_lpot,) = lpot(actx.queue, + _, (result_lpot,) = lpot(actx, targets=targets, sources=sources, strength=strengths, **extra_kwargs) result_lpot = actx.to_numpy(result_lpot) - _, (mat,) = mat_gen(actx.queue, + _, (mat,) = mat_gen(actx, targets=targets, sources=sources, **extra_kwargs) mat = actx.to_numpy(mat) result_mat = mat @ actx.to_numpy(strengths[0]) - _, (blk,) = blk_gen(actx.queue, + _, (blk,) = blk_gen(actx, targets=targets, sources=sources, tgtindices=tgtindices, diff --git a/test/test_misc.py b/test/test_misc.py index 5032e2fc9..0e1cd66e2 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -112,7 +112,7 @@ def test_pde_check_kernels(actx_factory, knl_info, order=5): actx = actx_factory() dim = knl_info.kernel.dim - tctx = t.ToyContext(actx.context, knl_info.kernel, + tctx = t.ToyContext(actx, knl_info.kernel, extra_source_kwargs=knl_info.extra_kwargs) rng = np.random.default_rng(42) @@ -297,7 +297,7 @@ def test_toy_p2e2e2p(actx_factory, case): from sumpy.expansion import VolumeTaylorExpansionFactory actx = actx_factory() - ctx = t.ToyContext(actx.context, + ctx = t.ToyContext(actx, LaplaceKernel(dim), expansion_factory=VolumeTaylorExpansionFactory()) diff --git a/test/test_qbx.py b/test/test_qbx.py index c2b61c3b5..c38af078b 100644 --- a/test/test_qbx.py +++ b/test/test_qbx.py @@ -63,7 +63,7 @@ def test_direct_qbx_vs_eigval(actx_factory, expn_class, visualize=False): from sumpy.qbx import LayerPotential - lpot = LayerPotential(actx.context, + lpot = LayerPotential(actx, expansion=expn_class(lknl, order), target_kernels=(lknl,), source_kernels=(lknl,)) @@ -96,7 +96,7 @@ def test_direct_qbx_vs_eigval(actx_factory, expn_class, visualize=False): strengths = (sigma * h,) evt, (result_qbx,) = lpot( - actx.queue, + actx, targets, sources, centers, strengths, expansion_radii=expansion_radii) @@ -133,9 +133,9 @@ def test_direct_qbx_vs_eigval_with_tgt_deriv( from sumpy.qbx import LayerPotential - lpot_dx = LayerPotential(actx.context, expansion=expn_class(lknl, order), + lpot_dx = LayerPotential(actx, expansion=expn_class(lknl, order), target_kernels=(AxisTargetDerivative(0, lknl),), source_kernels=(lknl,)) - lpot_dy = LayerPotential(actx.context, expansion=expn_class(lknl, order), + lpot_dy = LayerPotential(actx, expansion=expn_class(lknl, order), target_kernels=(AxisTargetDerivative(1, lknl),), source_kernels=(lknl,)) mode_nr = 15 @@ -167,10 +167,10 @@ def test_direct_qbx_vs_eigval_with_tgt_deriv( strengths = (sigma * h,) - evt, (result_qbx_dx,) = lpot_dx(actx.queue, + evt, (result_qbx_dx,) = lpot_dx(actx, targets, sources, centers, strengths, expansion_radii=expansion_radii) - evt, (result_qbx_dy,) = lpot_dy(actx.queue, + evt, (result_qbx_dy,) = lpot_dy(actx, targets, sources, centers, strengths, expansion_radii=expansion_radii) diff --git a/test/test_tools.py b/test/test_tools.py index 09c88b78e..6cc38b5d4 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -90,7 +90,7 @@ def test_fft(actx_factory, size): out = fft(inp) fft_func = loopy_fft(inp.shape, inverse=False, complex_dtype=inp.dtype.type) - evt, (out_dev,) = fft_func(actx.queue, y=inp_dev) + evt, (out_dev,) = fft_func(actx, y=inp_dev) assert np.allclose(actx.to_numpy(out_dev), out) From 3123ec7d7805b16948ecd4a06e9aa14b2cc2bc49 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 3 Sep 2022 21:17:19 +0300 Subject: [PATCH 03/59] sumpy.array_context additions --- sumpy/array_context.py | 51 +++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/sumpy/array_context.py b/sumpy/array_context.py index 760f7d5d8..6463e1a79 100644 --- a/sumpy/array_context.py +++ b/sumpy/array_context.py @@ -32,18 +32,49 @@ # {{{ PyOpenCLArrayContext -class PyOpenCLArrayContext(PyOpenCLArrayContextBase): - def transform_loopy_program(self, t_unit): - default_ep = t_unit.default_entrypoint - options = default_ep.options +def make_loopy_program( + domains, statements, + kernel_data=None, *, + name="sumpy_loopy_kernel", + silenced_warnings=None, + index_dtype=None, + tags=None): + """Return a :class:`loopy.LoopKernel` suitable for use with + :meth:`ArrayContext.call_loopy`. + """ + if kernel_data is None: + kernel_data = [...] + + if silenced_warnings is None: + silenced_warnings = [] + + import loopy as lp + from arraycontext.loopy import _DEFAULT_LOOPY_OPTIONS + + return lp.make_kernel( + domains, + statements, + kernel_data=kernel_data, + options=_DEFAULT_LOOPY_OPTIONS, + default_offset=lp.auto, + name=name, + lang_version=lp.MOST_RECENT_LANGUAGE_VERSION, + silenced_warnings=silenced_warnings, + index_dtype=index_dtype, + tags=tags) - if not (options.return_dict and options.no_numpy): - raise ValueError("Loopy kernel passed to call_loopy must " - "have return_dict and no_numpy options set. " - "Did you use arraycontext.make_loopy_program " - "to create this kernel?") - return super().transform_loopy_program(t_unit) +class PyOpenCLArrayContext(PyOpenCLArrayContextBase): + def transform_loopy_program(self, t_unit): + for name in t_unit.entrypoints: + options = t_unit[name].options + if not (options.return_dict and options.no_numpy): + raise ValueError( + f"loopy kernel '{name}' passed to call_loopy must " + "have 'return_dict' and 'no_numpy' options set. " + "Did you use 'make_loopy_program' to create this kernel?") + + return t_unit # }}} From ecc9d7f4ef2d36d7f8ce8a3d5a921c251fdfc453 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 3 Sep 2022 21:17:55 +0300 Subject: [PATCH 04/59] port p2p to arraycontext --- sumpy/p2p.py | 129 ++++++++++++++++++++++++++++----------------------- 1 file changed, 71 insertions(+), 58 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 597ec337d..e92689bde 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -25,10 +25,12 @@ import numpy as np import loopy as lp -from loopy.version import MOST_RECENT_LANGUAGE_VERSION -from sumpy.tools import ( - KernelComputation, KernelCacheWrapper, is_obj_array_like) +from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program +from sumpy.tools import KernelComputation, KernelCacheMixin, is_obj_array_like + +import logging +logger = logging.getLogger(__name__) __doc__ = """ @@ -50,7 +52,7 @@ # {{{ p2p base class -class P2PBase(KernelComputation, KernelCacheWrapper): +class P2PBase(KernelCacheMixin, KernelComputation): def __init__(self, target_kernels, exclude_self, strength_usage=None, value_dtypes=None, name=None, source_kernels=None): """ @@ -64,8 +66,9 @@ def __init__(self, target_kernels, exclude_self, strength_usage=None, Default: all kernels use the same strength. """ from pytools import single_valued - from sumpy.kernel import (TargetTransformationRemover, - SourceTransformationRemover) + from sumpy.kernel import ( + TargetTransformationRemover, SourceTransformationRemover) + txr = TargetTransformationRemover() sxr = SourceTransformationRemover() @@ -87,9 +90,9 @@ def __init__(self, target_kernels, exclude_self, strength_usage=None, value_dtypes=value_dtypes, name=name) self.exclude_self = exclude_self - - self.dim = single_valued(knl.dim for knl in - list(self.target_kernels) + list(self.source_kernels)) + self.dim = single_valued([ + knl.dim for knl in self.target_kernels + self.source_kernels + ]) def get_cache_key(self): return (type(self).__name__, tuple(self.target_kernels), self.exclude_self, @@ -213,7 +216,7 @@ def get_kernel(self): shape="nresults, ntargets", dim_tags="sep,C") ]) - loopy_knl = lp.make_kernel([""" + loopy_knl = make_loopy_program([""" {[itgt, isrc, idim]: \ 0 <= itgt < ntargets and \ 0 <= isrc < nsources and \ @@ -232,16 +235,15 @@ def get_kernel(self): simul_reduce(sum, isrc, pair_result_{iknl}) {{inames=itgt}} """ for iknl in range(len(self.target_kernels))] + ["end"], - arguments, - assumptions="nsources>=1 and ntargets>=1", + kernel_data=arguments, name=self.name, - default_offset=lp.auto, - fixed_parameters=dict( - dim=self.dim, - nstrengths=self.strength_count, - nresults=len(self.target_kernels)), - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) + loopy_knl = lp.assume(loopy_knl, "nsources>=1 and ntargets>=1") + loopy_knl = lp.fix_parameters(loopy_knl, + dim=self.dim, + nstrengths=self.strength_count, + nresults=len(self.target_kernels)) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.target_kernels + self.source_kernels: @@ -249,13 +251,16 @@ def get_kernel(self): return loopy_knl - def __call__(self, queue, targets, sources, strength, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, + targets, sources, strength, **kwargs): knl = self.get_cached_optimized_kernel( - targets_is_obj_array=is_obj_array_like(targets), - sources_is_obj_array=is_obj_array_like(sources)) + targets_is_obj_array=is_obj_array_like(targets), + sources_is_obj_array=is_obj_array_like(sources)) - return knl(queue, sources=sources, targets=targets, strength=strength, - **kwargs) + return actx.call_loopy( + knl, + sources=sources, targets=targets, strength=strength, + **kwargs) # }}} @@ -279,7 +284,7 @@ def get_kernel(self): for i, dtype in enumerate(self.value_dtypes) ]) - loopy_knl = lp.make_kernel([""" + loopy_knl = make_loopy_program([""" {[itgt, isrc, idim]: \ 0 <= itgt < ntargets and \ 0 <= isrc < nsources and \ @@ -297,11 +302,11 @@ def get_kernel(self): """ for iknl in range(len(self.target_kernels))] + ["end"], arguments, - assumptions="nsources>=1 and ntargets>=1", name=self.name, - fixed_parameters=dict(dim=self.dim), - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) + loopy_knl = lp.assume(loopy_knl, "nsources>=1 and ntargets>=1") + loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.target_kernels + self.source_kernels: @@ -309,12 +314,12 @@ def get_kernel(self): return loopy_knl - def __call__(self, queue, targets, sources, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, targets, sources, **kwargs): knl = self.get_cached_optimized_kernel( targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) - return knl(queue, sources=sources, targets=targets, **kwargs) + return actx.call_loopy(knl, sources=sources, targets=targets, **kwargs) # }}} @@ -349,7 +354,7 @@ def get_kernel(self): for i, dtype in enumerate(self.value_dtypes) ]) - loopy_knl = lp.make_kernel( + loopy_knl = make_loopy_program( "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}", self.get_kernel_scaling_assignments() # NOTE: itgt, isrc need to always be defined in case a statement @@ -373,11 +378,12 @@ def get_kernel(self): """ for iknl in range(len(self.target_kernels))] + ["end"], arguments, - assumptions="nresult>=1", silenced_warnings="write_race(write_p2p*)", name=self.name, - fixed_parameters=dict(dim=self.dim), - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) + + loopy_knl = lp.assume(loopy_knl, "nresult>=1") + loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.add_dtypes(loopy_knl, @@ -403,7 +409,8 @@ def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array): enforce_variable_access_ordered="no_check") return knl - def __call__(self, queue, targets, sources, tgtindices, srcindices, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, + targets, sources, tgtindices, srcindices, **kwargs): """Evaluate a subset of the P2P matrix interactions. :arg targets: target point coordinates, which can be an object @@ -423,11 +430,12 @@ def __call__(self, queue, targets, sources, tgtindices, srcindices, **kwargs): targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) - return knl(queue, - targets=targets, - sources=sources, - tgtindices=tgtindices, - srcindices=srcindices, **kwargs) + return actx.call_loopy( + knl, + targets=targets, + sources=sources, + tgtindices=tgtindices, + srcindices=srcindices, **kwargs) # }}} @@ -437,8 +445,9 @@ def __call__(self, queue, targets, sources, tgtindices, srcindices, **kwargs): class P2PFromCSR(P2PBase): default_name = "p2p_from_csr" - def get_kernel(self, max_nsources_in_one_box, max_ntargets_in_one_box, - gpu=False, nsplit=32): + def get_kernel(self, + max_nsources_in_one_box: int, max_ntargets_in_one_box: int, *, + gpu: bool = False, nsplit: int = 32): loopy_insns, result_names = self.get_loopy_insns_and_result_names() arguments = self.get_default_src_tgt_arguments() \ + [ @@ -459,7 +468,7 @@ def get_kernel(self, max_nsources_in_one_box, max_ntargets_in_one_box, lp.GlobalArg("result", None, shape="noutputs, ntargets", dim_tags="sep,C"), lp.TemporaryVariable("tgt_center", shape=(self.dim,)), - "..." + ... ] domains = [ @@ -606,22 +615,25 @@ def get_kernel(self, max_nsources_in_one_box, max_ntargets_in_one_box, end """]) - loopy_knl = lp.make_kernel( + loopy_knl = make_loopy_program( domains, instructions, arguments, - assumptions="ntgt_boxes>=1", name=self.name, - silenced_warnings=["write_race(write_csr*)", "write_race(prefetch_src)", + silenced_warnings=[ + "write_race(write_csr*)", + "write_race(prefetch_src)", "write_race(prefetch_charge)"], - fixed_parameters=dict( - dim=self.dim, - nstrengths=self.strength_count, - nsplit=nsplit, - src_outer_limit=src_outer_limit, - tgt_outer_limit=tgt_outer_limit, - noutputs=len(self.target_kernels)), - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) + + loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") + loopy_knl = lp.fix_parameters(loopy_knl, + dim=self.dim, + nstrengths=self.strength_count, + nsplit=nsplit, + src_outer_limit=src_outer_limit, + tgt_outer_limit=tgt_outer_limit, + noutputs=len(self.target_kernels)) loopy_knl = lp.add_dtypes(loopy_knl, dict(nsources=np.int32, ntargets=np.int32)) @@ -636,8 +648,9 @@ def get_kernel(self, max_nsources_in_one_box, max_ntargets_in_one_box, return loopy_knl - def get_optimized_kernel(self, max_nsources_in_one_box, - max_ntargets_in_one_box, is_cpu): + def get_optimized_kernel(self, + max_nsources_in_one_box: int, max_ntargets_in_one_box: int, + is_cpu: bool): if is_cpu: knl = self.get_kernel(max_nsources_in_one_box, max_ntargets_in_one_box, gpu=False) @@ -657,16 +670,16 @@ def get_optimized_kernel(self, max_nsources_in_one_box, return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): import pyopencl as cl max_nsources_in_one_box = kwargs.pop("max_nsources_in_one_box") max_ntargets_in_one_box = kwargs.pop("max_ntargets_in_one_box") knl = self.get_cached_optimized_kernel( max_nsources_in_one_box=max_nsources_in_one_box, max_ntargets_in_one_box=max_ntargets_in_one_box, - is_cpu=queue.dev.type & cl.device_type.CPU) + is_cpu=actx.queue.dev.type & cl.device_type.CPU) - return knl(queue, **kwargs) + return actx.call_loopy(knl, **kwargs) # }}} From 2285243fbdbb2d0713c236715d19e87e31ccfd24 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 3 Sep 2022 21:18:12 +0300 Subject: [PATCH 05/59] port p2e to arraycontext --- sumpy/p2e.py | 58 ++++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 3fd071b53..b01b255a2 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -22,9 +22,9 @@ import numpy as np import loopy as lp -from loopy.version import MOST_RECENT_LANGUAGE_VERSION -from sumpy.tools import KernelCacheWrapper, KernelComputation +from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program +from sumpy.tools import KernelCacheMixin, KernelComputation import logging logger = logging.getLogger(__name__) @@ -43,14 +43,13 @@ # {{{ P2E base class -class P2EBase(KernelComputation, KernelCacheWrapper): +class P2EBase(KernelCacheMixin, KernelComputation): """Common input processing for kernel computations. .. automethod:: __init__ """ - def __init__(self, expansion, kernels=None, - name=None, strength_usage=None): + def __init__(self, expansion, kernels=None, name=None, strength_usage=None): """ :arg expansion: a subclass of :class:`sumpy.expansion.ExpansionBase` :arg kernels: if not provided, the kernel of the *expansion* is used. @@ -135,11 +134,11 @@ def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array): enforce_variable_access_ordered="no_check") return knl - def __call__(self, actx: ArrayContext, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): from sumpy.tools import is_obj_array_like sources = kwargs.pop("sources") centers = kwargs.pop("centers") - knl = self.get_kernel( + knl = self.get_cached_optimized_kernel( sources_is_obj_array=is_obj_array_like(sources), centers_is_obj_array=is_obj_array_like(centers)) @@ -148,7 +147,10 @@ def __call__(self, actx: ArrayContext, **kwargs): dtype = centers[0].dtype if is_obj_array_like(centers) else centers.dtype rscale = dtype.type(kwargs.pop("rscale")) - return actx.call_loopy(knl, sources=sources, centers=centers, rscale=rscale, **kwargs) + return actx.call_loopy( + knl, + sources=sources, centers=centers, rscale=rscale, + **kwargs) # }}} @@ -166,7 +168,7 @@ def get_kernel(self): ncoeffs = len(self.expansion) from sumpy.tools import gather_loopy_source_arguments - loopy_knl = lp.make_kernel([ + loopy_knl = make_loopy_program([ "{[isrc_box]: 0 <= isrc_box < nsrc_boxes}", "{[isrc, idim]: isrc_start <= isrc < isrc_end and 0 <= idim < dim}", ], [""" @@ -208,16 +210,17 @@ def get_kernel(self): ] + gather_loopy_source_arguments( self.source_kernels + (self.expansion,)), name=self.name, - assumptions="nsrc_boxes>=1", silenced_warnings="write_race(write_expn*)", - default_offset=lp.auto, - fixed_parameters=dict(dim=self.dim, - strength_count=self.strength_count), - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) + + loopy_knl = lp.assume(loopy_knl, "nsrc_boxes>=1") + loopy_knl = lp.fix_parameters(loopy_knl, + dim=self.dim, + strength_count=self.strength_count) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.source_kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl @@ -230,7 +233,7 @@ def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array): knl = lp.split_iname(knl, "isrc_box", 16, outer_tag="g.0") return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): """ :arg source_boxes: an array of integer indices into *box_source_starts* and *box_source_counts_nonchild*. @@ -250,7 +253,7 @@ def __call__(self, queue, **kwargs): :returns: an array of *tgt_expansions*. """ - return super().__call__(queue, **kwargs) + return super().__call__(actx, **kwargs) # }}} @@ -288,7 +291,7 @@ def get_kernel(self): ] + gather_loopy_source_arguments( self.source_kernels + (self.expansion,))) - loopy_knl = lp.make_kernel( + loopy_knl = make_loopy_program( [ "{[itgt_box]: 0 <= itgt_box < ntgt_boxes}", "{[isrc_box]: isrc_box_start <= isrc_box < isrc_box_stop}", @@ -325,18 +328,19 @@ def get_kernel(self): """ for coeffidx in range(ncoeffs)] + [""" end """], - arguments, + kernel_data=arguments, name=self.name, - assumptions="ntgt_boxes>=1", silenced_warnings="write_race(write_expn*)", - default_offset=lp.auto, - fixed_parameters=dict(dim=self.dim, - strength_count=self.strength_count), - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) + + loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") + loopy_knl = lp.fix_parameters(loopy_knl, + dim=self.dim, + strength_count=self.strength_count) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.source_kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl @@ -349,7 +353,7 @@ def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array): knl = lp.split_iname(knl, "itgt_box", 16, outer_tag="g.0") return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): """ :arg target_boxes: array of integer indices into *source_box_starts* and *centers*. @@ -368,7 +372,7 @@ def __call__(self, queue, **kwargs): :arg tgt_base_ibox: see :meth:`P2EFromSingleBox.__call__`. :arg tgt_expansion: see :meth:`P2EFromSingleBox.__call__`. """ - return super().__call__(queue, **kwargs) + return super().__call__(actx, **kwargs) # }}} From c87c528638d88a1b0d1ac3133f93c3c85fcbacda Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 3 Sep 2022 21:18:21 +0300 Subject: [PATCH 06/59] port e2p to arraycontext --- sumpy/e2p.py | 85 ++++++++++++++++++++++++++-------------------------- 1 file changed, 42 insertions(+), 43 deletions(-) diff --git a/sumpy/e2p.py b/sumpy/e2p.py index a229b8931..e7694b912 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -22,10 +22,9 @@ import numpy as np import loopy as lp -import sumpy.symbolic as sym -from sumpy.tools import KernelCacheWrapper -from loopy.version import MOST_RECENT_LANGUAGE_VERSION +from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program +from sumpy.tools import KernelCacheMixin __doc__ = """ @@ -42,7 +41,7 @@ # {{{ E2P base class -class E2PBase(KernelCacheWrapper): +class E2PBase(KernelCacheMixin): def __init__(self, expansion, kernels, name=None): """ :arg expansion: a subclass of :class:`sympy.expansion.ExpansionBase` @@ -52,13 +51,12 @@ def __init__(self, expansion, kernels, name=None): Default: all kernels use the same strength. """ - from sumpy.kernel import (SourceTransformationRemover, - TargetTransformationRemover) + from sumpy.kernel import ( + SourceTransformationRemover, TargetTransformationRemover) sxr = SourceTransformationRemover() txr = TargetTransformationRemover() - expansion = expansion.with_kernel( - sxr(expansion.kernel)) + expansion = expansion.with_kernel(sxr(expansion.kernel)) kernels = [sxr(knl) for knl in kernels] for knl in kernels: assert txr(knl) == expansion.kernel @@ -70,8 +68,8 @@ def __init__(self, expansion, kernels, name=None): self.dim = expansion.dim def get_loopy_insns_and_result_names(self): - from sumpy.symbolic import make_sym_vector - bvec = make_sym_vector("b", self.dim) + import sumpy.symbolic as sym + bvec = sym.make_sym_vector("b", self.dim) import sumpy.symbolic as sp rscale = sp.Symbol("rscale") @@ -131,11 +129,10 @@ def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() - loopy_knl = lp.make_kernel( - [ - "{[itgt_box]: 0<=itgt_box=1") + loopy_knl = lp.fix_parameters(loopy_knl, + dim=self.dim, nresults=len(result_names)) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") + for knl in self.kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) @@ -202,7 +200,7 @@ def get_optimized_kernel(self): return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): """ :arg expansions: :arg target_boxes: @@ -211,14 +209,15 @@ def __call__(self, queue, **kwargs): :arg centers: :arg targets: """ - knl = self.get_cached_optimized_kernel() centers = kwargs.pop("centers") # "1" may be passed for rscale, which won't have its type # meaningfully inferred. Make the type of rscale explicit. rscale = centers.dtype.type(kwargs.pop("rscale")) - return knl(queue, centers=centers, rscale=rscale, **kwargs) + return actx.call_loopy( + self.get_cached_optimized_kernel(), + centers=centers, rscale=rscale, **kwargs) # }}} @@ -233,13 +232,12 @@ def get_kernel(self): loopy_insns, result_names = self.get_loopy_insns_and_result_names() - loopy_knl = lp.make_kernel( - [ - "{[itgt_box]: 0<=itgt_box=1") + loopy_knl = lp.fix_parameters(loopy_knl, + dim=self.dim, + nresults=len(result_names)) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.prioritize_loops(loopy_knl, "itgt_box,itgt,isrc_box") + for knl in self.kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) @@ -316,15 +315,15 @@ def get_optimized_kernel(self): enforce_variable_access_ordered="no_check") return knl - def __call__(self, queue, **kwargs): - knl = self.get_cached_optimized_kernel() - + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): centers = kwargs.pop("centers") # "1" may be passed for rscale, which won't have its type # meaningfully inferred. Make the type of rscale explicit. rscale = centers.dtype.type(kwargs.pop("rscale")) - return knl(queue, centers=centers, rscale=rscale, **kwargs) + return actx.call_loopy( + self.get_cached_optimized_kernel(), + centers=centers, rscale=rscale, **kwargs) # }}} From 2aa656bfd809779047cd8513d25a9ab9fec10094 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 3 Sep 2022 21:18:30 +0300 Subject: [PATCH 07/59] port e2e to arraycontext --- sumpy/e2e.py | 318 ++++++++++++++++++++++++--------------------------- 1 file changed, 150 insertions(+), 168 deletions(-) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index b005f1ced..3609d358a 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -22,12 +22,10 @@ import numpy as np import loopy as lp -import sumpy.symbolic as sym -import pymbolic -from loopy.version import MOST_RECENT_LANGUAGE_VERSION -from sumpy.tools import KernelCacheWrapper, to_complex_dtype from pytools import memoize_method +from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program +from sumpy.tools import KernelCacheMixin, to_complex_dtype import logging logger = logging.getLogger(__name__) @@ -48,9 +46,8 @@ # {{{ translation base class -class E2EBase(KernelCacheWrapper): - def __init__(self, src_expansion, tgt_expansion, - name=None): +class E2EBase(KernelCacheMixin): + def __init__(self, src_expansion, tgt_expansion, name=None): """ :arg expansion: a subclass of :class:`sympy.expansion.ExpansionBase` :arg strength_usage: A list of integers indicating which expression @@ -58,24 +55,19 @@ def __init__(self, src_expansion, tgt_expansion, number of strength arrays that need to be passed. Default: all kernels use the same strength. """ + from sumpy.kernel import ( + TargetTransformationRemover, SourceTransformationRemover) + txr = TargetTransformationRemover() + sxr = SourceTransformationRemover() if src_expansion is tgt_expansion: - from sumpy.kernel import (TargetTransformationRemover, - SourceTransformationRemover) - tgt_expansion = src_expansion = src_expansion.with_kernel( - SourceTransformationRemover()( - TargetTransformationRemover()(src_expansion.kernel))) - + tgt_expansion = src_expansion = ( + src_expansion.with_kernel(sxr(txr(src_expansion.kernel)))) else: - - from sumpy.kernel import (TargetTransformationRemover, - SourceTransformationRemover) - src_expansion = src_expansion.with_kernel( - SourceTransformationRemover()( - TargetTransformationRemover()(src_expansion.kernel))) - tgt_expansion = tgt_expansion.with_kernel( - SourceTransformationRemover()( - TargetTransformationRemover()(tgt_expansion.kernel))) + src_expansion = ( + src_expansion.with_kernel(sxr(txr(src_expansion.kernel)))) + tgt_expansion = ( + tgt_expansion.with_kernel(sxr(txr(tgt_expansion.kernel)))) self.src_expansion = src_expansion self.tgt_expansion = tgt_expansion @@ -89,8 +81,8 @@ def __init__(self, src_expansion, tgt_expansion, @memoize_method def get_translation_loopy_insns(self): - from sumpy.symbolic import make_sym_vector - dvec = make_sym_vector("d", self.dim) + import sumpy.symbolic as sym + dvec = sym.make_sym_vector("d", self.dim) src_coeff_exprs = [ sym.Symbol(f"src_coeff{i}") @@ -119,11 +111,7 @@ def get_translation_loopy_insns(self): ) def get_cache_key(self): - return ( - type(self).__name__, - self.src_expansion, - self.tgt_expansion, - ) + return (type(self).__name__, self.src_expansion, self.tgt_expansion) def get_optimized_kernel(self): # FIXME @@ -145,8 +133,8 @@ class E2EFromCSR(E2EBase): default_name = "e2e_from_csr" def get_translation_loopy_insns(self): - from sumpy.symbolic import make_sym_vector - dvec = make_sym_vector("d", self.dim) + import sumpy.symbolic as sym + dvec = sym.make_sym_vector("d", self.dim) src_rscale = sym.Symbol("src_rscale") tgt_rscale = sym.Symbol("tgt_rscale") @@ -185,12 +173,11 @@ def get_kernel(self): # (same for itgt_box, tgt_ibox) from sumpy.tools import gather_loopy_arguments - loopy_knl = lp.make_kernel( - [ - "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] @@ -219,7 +206,7 @@ def get_kernel(self): """ for coeffidx in range(ncoeff_tgt)] + [""" end """], - [ + kernel_data=[ lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), lp.ValueArg("src_rscale,tgt_rscale", None), lp.GlobalArg("src_box_starts, src_box_lists", @@ -232,24 +219,23 @@ def get_kernel(self): shape=("nsrc_level_boxes", ncoeff_src), offset=lp.auto), lp.GlobalArg("tgt_expansions", None, shape=("ntgt_level_boxes", ncoeff_tgt), offset=lp.auto), - "..." + ... ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, - assumptions="ntgt_boxes>=1", silenced_warnings="write_race(write_expn*)", - default_offset=lp.auto, - fixed_parameters=dict(dim=self.dim), - lang_version=MOST_RECENT_LANGUAGE_VERSION ) - for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: - loopy_knl = knl.prepare_loopy_kernel(loopy_knl) + loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") + loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.set_options(loopy_knl, enforce_variable_access_ordered="no_check") + for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) + return loopy_knl def get_optimized_kernel(self): @@ -259,7 +245,7 @@ def get_optimized_kernel(self): return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): """ :arg src_expansions: :arg src_box_starts: @@ -274,12 +260,11 @@ def __call__(self, queue, **kwargs): src_rscale = centers.dtype.type(kwargs.pop("src_rscale")) tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) - knl = self.get_cached_optimized_kernel() - - return knl(queue, - centers=centers, - src_rscale=src_rscale, tgt_rscale=tgt_rscale, - **kwargs) + return actx.call_loopy( + self.get_cached_optimized_kernel(), + centers=centers, + src_rscale=src_rscale, tgt_rscale=tgt_rscale, + **kwargs) class M2LUsingTranslationClassesDependentData(E2EFromCSR): @@ -290,8 +275,8 @@ class M2LUsingTranslationClassesDependentData(E2EFromCSR): default_name = "m2l_using_translation_classes_dependent_data" def get_translation_loopy_insns(self, result_dtype): - from sumpy.symbolic import make_sym_vector - dvec = make_sym_vector("d", self.dim) + import sumpy.symbolic as sym + dvec = sym.make_sym_vector("d", self.dim) src_rscale = sym.Symbol("src_rscale") tgt_rscale = sym.Symbol("tgt_rscale") @@ -349,11 +334,13 @@ def get_inner_loopy_kernel(self, result_dtype): ncoeff_src = len(self.src_expansion) ncoeff_tgt = len(self.tgt_expansion) + import pymbolic as prim + domains = [] insns = self.get_translation_loopy_insns(result_dtype) - tgt_coeffs = pymbolic.var("tgt_coeffs") + tgt_coeffs = prim.var("tgt_coeffs") for i in range(ncoeff_tgt): - expr = pymbolic.var(f"tgt_coeff{i}") + expr = prim.var(f"tgt_coeff{i}") insn = lp.Assignment(assignee=tgt_coeffs[i], expression=tgt_coeffs[i] + expr) insns.append(insn) @@ -396,13 +383,12 @@ def get_kernel(self, result_dtype): translation_knl = self.get_inner_loopy_kernel(result_dtype) from sumpy.tools import gather_loopy_arguments - loopy_knl = lp.make_kernel( - [ - "{[itgt_box]: 0<=itgt_box=1") + loopy_knl = lp.fix_parameters(loopy_knl, + dim=self.dim, + m2l_translation_classes_dependent_ndata=( + m2l_translation_classes_dependent_ndata), + ncoeff_tgt=ncoeff_tgt, + ncoeff_src=ncoeff_src) + loopy_knl = lp.merge([translation_knl, loopy_knl]) loopy_knl = lp.inline_callable_kernel(loopy_knl, "e2e") loopy_knl = lp.add_dependency( @@ -494,7 +480,7 @@ def get_optimized_kernel(self, result_dtype): return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx, **kwargs): """ :arg src_expansions: :arg src_box_starts: @@ -510,13 +496,12 @@ def __call__(self, queue, **kwargs): tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) src_expansions = kwargs.pop("src_expansions") - knl = self.get_cached_optimized_kernel(result_dtype=src_expansions.dtype) - - return knl(queue, - src_expansions=src_expansions, - centers=centers, - src_rscale=src_rscale, tgt_rscale=tgt_rscale, - **kwargs) + return actx.call_loopy( + self.get_cached_optimized_kernel(result_dtype=src_expansions.dtype), + src_expansions=src_expansions, + centers=centers, + src_rscale=src_rscale, tgt_rscale=tgt_rscale, + **kwargs) class M2LGenerateTranslationClassesDependentData(E2EBase): @@ -536,12 +521,11 @@ def get_kernel(self, result_dtype): self.tgt_expansion, self.src_expansion, result_dtype) from sumpy.tools import gather_loopy_arguments - loopy_knl = lp.make_kernel( - [ - "{[itr_class]: 0<=itr_class d[idim] = m2l_translation_vectors[idim, \ @@ -555,7 +539,7 @@ def get_kernel(self, result_dtype): ) {id=update,dep=set_d} end """], - [ + kernel_data=[ lp.ValueArg("src_rscale", None), lp.GlobalArg("m2l_translation_classes_dependent_data", None, shape=("ntranslation_classes", @@ -566,18 +550,17 @@ def get_kernel(self, result_dtype): lp.ValueArg("ntranslation_classes", np.int32), lp.ValueArg("ntranslation_vectors", np.int32), lp.ValueArg("translation_classes_level_start", np.int32), - "..." + ... ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, - assumptions="ntranslation_classes>=1", - default_offset=lp.auto, - fixed_parameters=dict( - dim=self.dim, - m2l_translation_classes_dependent_ndata=( - m2l_translation_classes_dependent_ndata)), - lang_version=MOST_RECENT_LANGUAGE_VERSION ) + loopy_knl = lp.assume(loopy_knl, "ntranslation_classes>=1") + loopy_knl = lp.fix_parameters(loopy_knl, + dim=self.dim, + m2l_translation_classes_dependent_ndata=( + m2l_translation_classes_dependent_ndata)) + for expr_knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = expr_knl.prepare_loopy_kernel(loopy_knl) @@ -600,7 +583,7 @@ def get_optimized_kernel(self, result_dtype): return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): """ :arg src_rscale: :arg translation_classes_level_start: @@ -617,14 +600,13 @@ def __call__(self, queue, **kwargs): "m2l_translation_classes_dependent_data") result_dtype = m2l_translation_classes_dependent_data.dtype - knl = self.get_cached_optimized_kernel(result_dtype=result_dtype) - - return knl(queue, - src_rscale=src_rscale, - m2l_translation_vectors=m2l_translation_vectors, - m2l_translation_classes_dependent_data=( - m2l_translation_classes_dependent_data), - **kwargs) + return actx.call_loopy( + self.get_cached_optimized_kernel(result_dtype=result_dtype), + src_rscale=src_rscale, + m2l_translation_vectors=m2l_translation_vectors, + m2l_translation_classes_dependent_data=( + m2l_translation_classes_dependent_data), + **kwargs) # }}} @@ -646,7 +628,7 @@ def get_kernel(self, result_dtype): self.tgt_expansion, self.src_expansion, result_dtype) from sumpy.tools import gather_loopy_arguments - loopy_knl = lp.make_kernel( + loopy_knl = make_loopy_program( [ "{[isrc_box]: 0<=isrc_box=1") + loopy_knl = lp.fix_parameters(loopy_knl, + nsrc_coeffs=nsrc_coeffs, + npreprocessed_src_coeffs=npreprocessed_src_coeffs) + for expn in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) @@ -696,17 +677,18 @@ def get_optimized_kernel(self, result_dtype): knl = lp.add_inames_for_unused_hw_axes(knl) return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): """ :arg src_expansions :arg preprocessed_src_expansions """ preprocessed_src_expansions = kwargs.pop("preprocessed_src_expansions") result_dtype = preprocessed_src_expansions.dtype - knl = self.get_cached_optimized_kernel(result_dtype=result_dtype) - return knl(queue, - preprocessed_src_expansions=preprocessed_src_expansions, **kwargs) + return actx.call_loopy( + self.get_cached_optimized_kernel(result_dtype=result_dtype), + preprocessed_src_expansions=preprocessed_src_expansions, + **kwargs) # }}} @@ -729,11 +711,10 @@ def get_kernel(self, result_dtype): self.tgt_expansion, self.src_expansion, result_dtype) from sumpy.tools import gather_loopy_arguments - loopy_knl = lp.make_kernel( - [ - "{[itgt_box]: 0<=itgt_box=1") + loopy_knl = lp.fix_parameters(loopy_knl, + dim=self.dim, + nsrc_coeffs=ntgt_coeffs_before_postprocessing, + ntgt_coeffs=ntgt_coeffs) + for expn in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) @@ -784,16 +763,18 @@ def get_optimized_kernel(self, result_dtype): knl = lp.split_iname(knl, "itgt_box", 16, outer_tag="g.0") return knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): """ :arg tgt_expansions :arg tgt_expansions_before_postprocessing """ tgt_expansions = kwargs.pop("tgt_expansions") result_dtype = tgt_expansions.dtype - knl = self.get_cached_optimized_kernel(result_dtype=result_dtype) - return knl(queue, tgt_expansions=tgt_expansions, **kwargs) + return actx.call_loopy( + self.get_cached_optimized_kernel(result_dtype=result_dtype), + tgt_expansions=tgt_expansions, + **kwargs) # }}} @@ -821,12 +802,11 @@ def get_kernel(self): for insn in self.get_translation_loopy_insns()] from sumpy.tools import gather_loopy_arguments - loopy_knl = lp.make_kernel( - [ - "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] @@ -859,7 +839,7 @@ def get_kernel(self): end end """], - [ + kernel_data=[ lp.GlobalArg("target_boxes", None, shape=lp.auto, offset=lp.auto), lp.GlobalArg("centers", None, shape="dim, aligned_nboxes"), @@ -873,13 +853,14 @@ def get_kernel(self): lp.ValueArg("src_base_ibox,tgt_base_ibox", np.int32), lp.ValueArg("ntgt_level_boxes,nsrc_level_boxes", np.int32), lp.ValueArg("aligned_nboxes", np.int32), - "..." + ... ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, - assumptions="ntgt_boxes>=1", silenced_warnings="write_race(write_expn*)", - fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim), - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) + + loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") + loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, nchildren=2**self.dim) for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) @@ -890,7 +871,7 @@ def get_kernel(self): return loopy_knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): """ :arg src_expansions: :arg src_box_starts: @@ -899,7 +880,6 @@ def __call__(self, queue, **kwargs): :arg tgt_rscale: :arg centers: """ - knl = self.get_cached_optimized_kernel() centers = kwargs.pop("centers") # "1" may be passed for rscale, which won't have its type @@ -907,10 +887,11 @@ def __call__(self, queue, **kwargs): src_rscale = centers.dtype.type(kwargs.pop("src_rscale")) tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) - return knl(queue, - centers=centers, - src_rscale=src_rscale, tgt_rscale=tgt_rscale, - **kwargs) + return actx.call_loopy( + self.get_cached_optimized_kernel(), + centers=centers, + src_rscale=src_rscale, tgt_rscale=tgt_rscale, + **kwargs) # }}} @@ -932,11 +913,10 @@ def get_kernel(self): # (same for itgt_box, tgt_ibox) from sumpy.tools import gather_loopy_arguments - loopy_knl = lp.make_kernel( - [ - "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] @@ -963,7 +943,7 @@ def get_kernel(self): """.format(i=i) for i in range(ncoeffs_tgt)] + [""" end """], - [ + kernel_data=[ lp.GlobalArg("target_boxes", None, shape=lp.auto, offset=lp.auto), lp.GlobalArg("centers", None, shape="dim, naligned_boxes"), @@ -976,12 +956,14 @@ def get_kernel(self): shape=("ntgt_level_boxes", ncoeffs_tgt), offset=lp.auto), lp.GlobalArg("src_expansions", None, shape=("nsrc_level_boxes", ncoeffs_src), offset=lp.auto), - "..." + ... ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), - name=self.name, assumptions="ntgt_boxes>=1", + name=self.name, silenced_warnings="write_race(write_expn*)", - fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim), - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) + + loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") + loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, nchildren=2**self.dim) for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) @@ -992,7 +974,7 @@ def get_kernel(self): return loopy_knl - def __call__(self, queue, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): """ :arg src_expansions: :arg src_box_starts: @@ -1001,7 +983,6 @@ def __call__(self, queue, **kwargs): :arg tgt_rscale: :arg centers: """ - knl = self.get_cached_optimized_kernel() centers = kwargs.pop("centers") # "1" may be passed for rscale, which won't have its type @@ -1009,10 +990,11 @@ def __call__(self, queue, **kwargs): src_rscale = centers.dtype.type(kwargs.pop("src_rscale")) tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) - return knl(queue, - centers=centers, - src_rscale=src_rscale, tgt_rscale=tgt_rscale, - **kwargs) + return actx.call_loopy( + self.get_cached_optimized_kernel(), + centers=centers, + src_rscale=src_rscale, tgt_rscale=tgt_rscale, + **kwargs) # }}} From 57971a526805ab757dda59d89f05a594b860321a Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 3 Sep 2022 21:18:47 +0300 Subject: [PATCH 08/59] port tools and toys to arraycontext --- sumpy/tools.py | 136 +++++++++++++------------- sumpy/toys.py | 256 ++++++++++++++++++++++++++----------------------- 2 files changed, 201 insertions(+), 191 deletions(-) diff --git a/sumpy/tools.py b/sumpy/tools.py index 67deb911c..a97a31d74 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -36,25 +36,24 @@ .. autoclass:: DifferentiatedExprDerivativeTaker """ -from pytools import memoize_method -from pytools.tag import Tag, tag_dataclass -import numbers -import warnings import os import sys import enum -import platform -from collections import defaultdict, namedtuple -from pymbolic.mapper import WalkMapper -import pymbolic +import numbers +import warnings +from dataclasses import dataclass +from typing import Any, Dict, List, Tuple import numpy as np -import sumpy.symbolic as sym -import pyopencl as cl -import pyopencl.array as cla import loopy as lp -from typing import Dict, Tuple, Any +from pytools import memoize_method +from pytools.tag import Tag, tag_dataclass +from pymbolic.mapper import WalkMapper +from arraycontext import Array + +import sumpy.symbolic as sym +from sumpy.array_context import PyOpenCLArrayContext import logging logger = logging.getLogger(__name__) @@ -452,7 +451,9 @@ def diff_derivative_coeff_dict(derivative_coeff_dict: DerivativeCoeffDict, *derivative_coeff_dict* using the variable given by **variable_idx** and return a new derivative transformation dictionary. """ + from collections import defaultdict new_derivative_coeff_dict = defaultdict(lambda: 0) + for mi, coeff in derivative_coeff_dict.items(): # In the case where we have x * u.diff(x), the result should # be x.diff(x) + x * u.diff(x, x) @@ -511,31 +512,6 @@ def build_matrix(op, dtype=None, shape=None): return mat -def vector_to_device(queue, vec): - from pytools.obj_array import obj_array_vectorize - - from pyopencl.array import to_device - - def to_dev(ary): - return to_device(queue, ary) - - return obj_array_vectorize(to_dev, vec) - - -def vector_from_device(queue, vec): - from pytools.obj_array import obj_array_vectorize - - def from_dev(ary): - from numbers import Number - if isinstance(ary, (np.number, Number)): - # zero, most likely - return ary - - return ary.get(queue=queue) - - return obj_array_vectorize(from_dev, vec) - - def _merge_kernel_arguments(dictionary, arg): # Check for strict equality until there's a usecase if dictionary.setdefault(arg.name, arg) != arg: @@ -716,7 +692,7 @@ def __eq__(self, other): # }}} -class KernelCacheWrapper: +class KernelCacheMixin: @memoize_method def get_cached_optimized_kernel(self, **kwargs): from sumpy import code_cache, CACHING_ENABLED, OPT_ENABLED @@ -763,6 +739,9 @@ def _allow_redundant_execution_of_knl_scaling(knl): knl, within=ObjTagged(ScalingAssignmentTag())) +KernelCacheWrapper = KernelCacheMixin + + def is_obj_array_like(ary): return ( isinstance(ary, (tuple, list)) @@ -934,10 +913,14 @@ def to_complex_dtype(dtype): raise RuntimeError(f"Unknown dtype: {dtype}") -ProfileGetter = namedtuple("ProfileGetter", "start, end") +@dataclass(frozen=True) +class ProfileGetter: + start: int + end: int def get_native_event(evt): + import pyopencl as cl return evt if isinstance(evt, cl.Event) else evt.native_event @@ -991,9 +974,10 @@ def loopy_fft(shape, inverse, complex_dtype, index_dtype=None, N1, m = find_factors(m) # noqa: N806 factors.append(N1) + import pymbolic as prim nfft = n - broadcast_dims = tuple(pymbolic.var(f"j{d}") for d in range(len(shape) - 1)) + broadcast_dims = tuple(prim.var(f"j{d}") for d in range(len(shape) - 1)) domains = [ "{[i]: 0<=i FFTBackend: +def _get_fft_backend(actx: PyOpenCLArrayContext) -> FFTBackend: env_val = os.environ.get("SUMPY_FFT_BACKEND", None) if env_val: if env_val not in ["loopy", "pyvkfft"]: @@ -1157,11 +1142,15 @@ def _get_fft_backend(queue) -> FFTBackend: warnings.warn("VkFFT not found. FFT runs will be slower.") return FFTBackend.loopy + import pyopencl as cl + queue = actx.queue + if queue.properties & cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE: warnings.warn("VkFFT does not support out of order queues yet. " "Falling back to slower implementation.") return FFTBackend.loopy + import platform if (sys.platform == "darwin" and platform.machine() == "x86_64" and queue.context.devices[0].platform.name @@ -1174,26 +1163,34 @@ def _get_fft_backend(queue) -> FFTBackend: return FFTBackend.pyvkfft -def get_opencl_fft_app(queue, shape, dtype, inverse): +def get_opencl_fft_app( + actx: PyOpenCLArrayContext, + shape: Tuple[int, ...], dtype: "np.dtype", *, + inverse: bool) -> Any: """Setup an object for out-of-place FFT on with given shape and dtype on given queue. """ assert dtype.type in (np.float32, np.float64, np.complex64, np.complex128) - backend = _get_fft_backend(queue) + backend = _get_fft_backend(actx) if backend == FFTBackend.loopy: return loopy_fft(shape, inverse=inverse, complex_dtype=dtype.type), backend elif backend == FFTBackend.pyvkfft: from pyvkfft.opencl import VkFFTApp - app = VkFFTApp(shape=shape, dtype=dtype, queue=queue, ndim=1, inplace=False) + app = VkFFTApp( + shape=shape, dtype=dtype, + queue=actx.queue, ndim=1, inplace=False) return app, backend else: raise RuntimeError(f"Unsupported FFT backend {backend}") -def run_opencl_fft(fft_app, queue, input_vec, inverse=False, wait_for=None): +def run_opencl_fft(actx: PyOpenCLArrayContext, + fft_app: Tuple[Any, FFTBackend], input_vec: Array, *, + inverse: bool = False, + wait_for: List[Any] = None): """Runs an FFT on input_vec and returns a :class:`MarkerBasedProfilingEvent` that indicate the end and start of the operations carried out and the output vector. @@ -1202,18 +1199,19 @@ def run_opencl_fft(fft_app, queue, input_vec, inverse=False, wait_for=None): app, backend = fft_app if backend == FFTBackend.loopy: - evt, (output_vec,) = app(queue, y=input_vec, wait_for=wait_for) + evt, (output_vec,) = app(actx.queue, y=input_vec, wait_for=wait_for) return (evt, output_vec) elif backend == FFTBackend.pyvkfft: if wait_for is None: wait_for = [] - start_evt = cl.enqueue_marker(queue, wait_for=wait_for[:]) + import pyopencl as cl + start_evt = cl.enqueue_marker(actx.queue, wait_for=wait_for[:]) if app.inplace: raise RuntimeError("inplace fft is not supported") else: - output_vec = cla.empty_like(input_vec, queue) + output_vec = actx.np.empty_like(input_vec) # FIXME: use the public API once # https://github.com/vincefn/pyvkfft/pull/17 is in @@ -1224,9 +1222,9 @@ def run_opencl_fft(fft_app, queue, input_vec, inverse=False, wait_for=None): meth = _vkfft_opencl.fft meth(app.app, int(input_vec.data.int_ptr), - int(output_vec.data.int_ptr), int(queue.int_ptr)) + int(output_vec.data.int_ptr), int(actx.queue.int_ptr)) - end_evt = cl.enqueue_marker(queue, wait_for=[start_evt]) + end_evt = cl.enqueue_marker(actx.queue, wait_for=[start_evt]) output_vec.add_event(end_evt) return (MarkerBasedProfilingEvent(end_event=end_evt, start_event=start_evt), diff --git a/sumpy/toys.py b/sumpy/toys.py index ded4a54e2..959394fe0 100644 --- a/sumpy/toys.py +++ b/sumpy/toys.py @@ -23,14 +23,14 @@ THE SOFTWARE. """ -from pytools import memoize_method -from numbers import Number from functools import partial -from sumpy.kernel import TargetTransformationRemover +from typing import Any, Optional + +import numpy as np -import numpy as np # noqa: F401 -import loopy as lp # noqa: F401 -import pyopencl as cl +from pytools import memoize_method +from sumpy.kernel import TargetTransformationRemover +from sumpy.array_context import PyOpenCLArrayContext import logging logger = logging.getLogger(__name__) @@ -85,34 +85,33 @@ class ToyContext: .. automethod:: __init__ """ - def __init__(self, cl_context, kernel, + def __init__(self, kernel, mpole_expn_class=None, local_expn_class=None, expansion_factory=None, extra_source_kwargs=None, extra_kernel_kwargs=None): - self.cl_context = cl_context - self.queue = cl.CommandQueue(self.cl_context) self.kernel = kernel - self.no_target_deriv_kernel = TargetTransformationRemover()(kernel) if expansion_factory is None: from sumpy.expansion import DefaultExpansionFactory expansion_factory = DefaultExpansionFactory() + if mpole_expn_class is None: - mpole_expn_class = \ - expansion_factory.get_multipole_expansion_class(kernel) + mpole_expn_class = ( + expansion_factory.get_multipole_expansion_class(kernel)) + if local_expn_class is None: from sumpy.expansion.m2l import NonFFTM2LTranslationClassFactory - local_expn_class = \ - expansion_factory.get_local_expansion_class(kernel) + local_expn_class = ( + expansion_factory.get_local_expansion_class(kernel)) m2l_translation_class_factory = NonFFTM2LTranslationClassFactory() - m2l_translation_class = \ - m2l_translation_class_factory.get_m2l_translation_class( - kernel, local_expn_class) - local_expn_class = partial(local_expn_class, - m2l_translation=m2l_translation_class()) + m2l_translation_class = ( + m2l_translation_class_factory.get_m2l_translation_class( + kernel, local_expn_class)) + local_expn_class = ( + partial(local_expn_class, m2l_translation=m2l_translation_class())) self.mpole_expn_class = mpole_expn_class self.local_expn_class = local_expn_class @@ -132,107 +131,110 @@ def __init__(self, cl_context, kernel, @memoize_method def get_p2p(self): from sumpy.p2p import P2P - return P2P(self.cl_context, (self.kernel,), exclude_self=False) + return P2P((self.kernel,), exclude_self=False) @memoize_method def get_p2m(self, order): from sumpy import P2EFromSingleBox - return P2EFromSingleBox(self.cl_context, - self.mpole_expn_class(self.no_target_deriv_kernel, order), - kernels=(self.kernel,)) + return P2EFromSingleBox( + self.mpole_expn_class(self.no_target_deriv_kernel, order), + kernels=(self.kernel,)) @memoize_method def get_p2l(self, order): from sumpy import P2EFromSingleBox - return P2EFromSingleBox(self.cl_context, - self.local_expn_class(self.no_target_deriv_kernel, order), - kernels=(self.kernel,)) + return P2EFromSingleBox( + self.local_expn_class(self.no_target_deriv_kernel, order), + kernels=(self.kernel,)) @memoize_method def get_m2p(self, order): from sumpy import E2PFromSingleBox - return E2PFromSingleBox(self.cl_context, - self.mpole_expn_class(self.no_target_deriv_kernel, order), - (self.kernel,)) + return E2PFromSingleBox( + self.mpole_expn_class(self.no_target_deriv_kernel, order), + (self.kernel,)) @memoize_method def get_l2p(self, order): from sumpy import E2PFromSingleBox - return E2PFromSingleBox(self.cl_context, - self.local_expn_class(self.no_target_deriv_kernel, order), - (self.kernel,)) + return E2PFromSingleBox( + self.local_expn_class(self.no_target_deriv_kernel, order), + (self.kernel,)) @memoize_method def get_m2m(self, from_order, to_order): from sumpy import E2EFromCSR - return E2EFromCSR(self.cl_context, - self.mpole_expn_class(self.no_target_deriv_kernel, from_order), - self.mpole_expn_class(self.no_target_deriv_kernel, to_order)) + return E2EFromCSR( + self.mpole_expn_class(self.no_target_deriv_kernel, from_order), + self.mpole_expn_class(self.no_target_deriv_kernel, to_order)) @memoize_method def get_m2l(self, from_order, to_order): from sumpy import E2EFromCSR - return E2EFromCSR(self.cl_context, - self.mpole_expn_class(self.no_target_deriv_kernel, from_order), - self.local_expn_class(self.no_target_deriv_kernel, to_order)) + return E2EFromCSR( + self.mpole_expn_class(self.no_target_deriv_kernel, from_order), + self.local_expn_class(self.no_target_deriv_kernel, to_order)) @memoize_method def get_l2l(self, from_order, to_order): from sumpy import E2EFromCSR - return E2EFromCSR(self.cl_context, - self.local_expn_class(self.no_target_deriv_kernel, from_order), - self.local_expn_class(self.no_target_deriv_kernel, to_order)) + return E2EFromCSR( + self.local_expn_class(self.no_target_deriv_kernel, from_order), + self.local_expn_class(self.no_target_deriv_kernel, to_order)) # }}} # {{{ helpers -def _p2e(psource, center, rscale, order, p2e, expn_class, expn_kwargs): - source_boxes = np.array([0], dtype=np.int32) - box_source_starts = np.array([0], dtype=np.int32) - box_source_counts_nonchild = np.array( - [psource.points.shape[-1]], dtype=np.int32) +def _p2e(actx: PyOpenCLArrayContext, + psource, center, rscale, order, p2e, expn_class, expn_kwargs): + source_boxes = actx.zeros(1, dtype=np.int32) + box_source_starts = actx.zeros(1, dtype=np.int32) + box_source_counts_nonchild = actx.from_numpy( + np.array([psource.points.shape[-1]], dtype=np.int32) + ) toy_ctx = psource.toy_ctx - center = np.asarray(center) - centers = np.array(center, dtype=np.float64).reshape( - toy_ctx.kernel.dim, 1) + centers = actx.from_numpy( + np.asarray(center, dtype=np.float64).reshape(-1, 1) + ) - evt, (coeffs,) = p2e(toy_ctx.queue, + coeffs = p2e(actx, source_boxes=source_boxes, box_source_starts=box_source_starts, box_source_counts_nonchild=box_source_counts_nonchild, centers=centers, - sources=psource.points, - strengths=(psource.weights,), + sources=actx.from_numpy(psource.points), + strengths=(actx.from_numpy(psource.weights),), rscale=rscale, nboxes=1, tgt_base_ibox=0, - #flags="print_hl_cl", - out_host=True, - **toy_ctx.extra_source_and_kernel_kwargs) + **toy_ctx.extra_source_and_kernel_kwargs)["tgt_expansions"] - return expn_class(toy_ctx, center, rscale, order, coeffs[0], - derived_from=psource, **expn_kwargs) + return expn_class( + toy_ctx, center, rscale, order, actx.to_numpy(coeffs[0]), + derived_from=psource, **expn_kwargs) -def _e2p(psource, targets, e2p): +def _e2p(actx: PyOpenCLArrayContext, psource, targets, e2p): ntargets = targets.shape[-1] - boxes = np.array([0], dtype=np.int32) - - box_target_starts = np.array([0], dtype=np.int32) - box_target_counts_nonchild = np.array([ntargets], dtype=np.int32) + boxes = actx.zeros(1, dtype=np.int32) + box_target_starts = actx.zeros(1, dtype=np.int32) + box_target_counts_nonchild = actx.from_numpy( + np.array([ntargets], dtype=np.int32)) toy_ctx = psource.toy_ctx - centers = np.array(psource.center, dtype=np.float64).reshape( - toy_ctx.kernel.dim, 1) - - coeffs = np.array([psource.coeffs]) - evt, (pot,) = e2p( - toy_ctx.queue, + centers = actx.from_numpy( + np.asarray(psource.center, dtype=np.float64).reshape(-1, 1) + ) + coeffs = actx.from_numpy(np.array([psource.coeffs])) + + from pytools.obj_array import make_obj_array + pot = e2p( + actx, src_expansions=coeffs, src_base_ibox=0, target_boxes=boxes, @@ -240,34 +242,32 @@ def _e2p(psource, targets, e2p): box_target_counts_nonchild=box_target_counts_nonchild, centers=centers, rscale=psource.rscale, - targets=targets, - #flags="print_hl_cl", - out_host=True, **toy_ctx.extra_kernel_kwargs) + targets=actx.from_numpy(make_obj_array(targets)), - return pot + **toy_ctx.extra_kernel_kwargs)["result_s0"] + return actx.to_numpy(pot) -def _e2e(psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs): - toy_ctx = psource.toy_ctx - target_boxes = np.array([1], dtype=np.int32) - src_box_starts = np.array([0, 1], dtype=np.int32) - src_box_lists = np.array([0], dtype=np.int32) - - centers = (np.array( - [ - # box 0: source - psource.center, - - # box 1: target - to_center, - ], - dtype=np.float64)).T.copy() - - coeffs = np.array([psource.coeffs]) +def _e2e(actx: PyOpenCLArrayContext, + psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs): + toy_ctx = psource.toy_ctx - evt, (to_coeffs,) = e2e( - toy_ctx.queue, + target_boxes = actx.from_numpy(np.array([1], dtype=np.int32)) + src_box_starts = actx.from_numpy(np.array([0, 1], dtype=np.int32)) + src_box_lists = actx.zeros(1, dtype=np.int32) + + centers = np.array([ + # box 0: source + psource.center, + # box 1: target + to_center, + ], dtype=np.float64) + centers = actx.from_numpy(centers.T.copy()) + coeffs = actx.from_numpy(np.array([psource.coeffs])) + + to_coeffs = e2e( + actx, src_expansions=coeffs, src_base_ibox=0, tgt_base_ibox=0, @@ -282,11 +282,11 @@ def _e2e(psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs): src_rscale=psource.rscale, tgt_rscale=to_rscale, - #flags="print_hl_cl", - out_host=True, **toy_ctx.extra_kernel_kwargs) + **toy_ctx.extra_kernel_kwargs)["tgt_expansions"] - return expn_class(toy_ctx, to_center, to_rscale, to_order, to_coeffs[1], - derived_from=psource, **expn_kwargs) + return expn_class( + toy_ctx, to_center, to_rscale, to_order, actx.to_numpy(to_coeffs[1]), + derived_from=psource, **expn_kwargs) # }}} @@ -313,13 +313,14 @@ class PotentialSource: def __init__(self, toy_ctx): self.toy_ctx = toy_ctx - def eval(self, targets): + def eval(self, actx: PyOpenCLArrayContext, targets): raise NotImplementedError() def __neg__(self): return -1*self def __add__(self, other): + from numbers import Number if isinstance(other, (Number, np.number)): other = ConstantPotential(self.toy_ctx, other) elif not isinstance(other, PotentialSource): @@ -336,6 +337,7 @@ def __rsub__(self, other): return (-self).__add__(other) def __mul__(self, other): + from numbers import Number if isinstance(other, (Number, np.number)): other = ConstantPotential(self.toy_ctx, other) elif not isinstance(other, PotentialSource): @@ -407,13 +409,15 @@ def __init__(self, toy_ctx, points, weights, center=None): self.weights = weights self._center = center - def eval(self, targets): - evt, (potential,) = self.toy_ctx.get_p2p()( - self.toy_ctx.queue, targets, self.points, [self.weights], - out_host=True, + def eval(self, actx: PyOpenCLArrayContext, targets): + potential = self.toy_ctx.get_p2p()( + actx, + actx.from_numpy(targets), + actx.from_numpy(self.points), + [actx.from_numpy(self.weights)], **self.toy_ctx.extra_source_and_kernel_kwargs) - return potential + return actx.to_numpy(potential["result_s0"]) @property def center(self): @@ -455,13 +459,13 @@ def with_coeffs(self, coeffs): class MultipoleExpansion(ExpansionPotentialSource): - def eval(self, targets): - return _e2p(self, targets, self.toy_ctx.get_m2p(self.order)) + def eval(self, actx: PyOpenCLArrayContext, targets): + return _e2p(actx, self, targets, self.toy_ctx.get_m2p(self.order)) class LocalExpansion(ExpansionPotentialSource): - def eval(self, targets): - return _e2p(self, targets, self.toy_ctx.get_l2p(self.order)) + def eval(self, actx: PyOpenCLArrayContext, targets): + return _e2p(actx, self, targets, self.toy_ctx.get_l2p(self.order)) class PotentialExpressionNode(PotentialSource): @@ -484,38 +488,42 @@ def center(self): class Sum(PotentialExpressionNode): - def eval(self, targets): + def eval(self, actx: PyOpenCLArrayContext, targets): result = 0 for psource in self.psources: - result = result + psource.eval(targets) + result = result + psource.eval(actx, targets) return result class Product(PotentialExpressionNode): - def eval(self, targets): + def eval(self, actx: PyOpenCLArrayContext, targets): result = 1 for psource in self.psources: - result = result * psource.eval(targets) + result = result * psource.eval(actx, targets) return result # }}} -def multipole_expand(psource, center, order=None, rscale=1, **expn_kwargs): +def multipole_expand( + actx: PyOpenCLArrayContext, psource: PotentialSource, center, *, + order: Optional[int] = None, rscale: float = 1, + **expn_kwargs: Any): if isinstance(psource, PointSources): if order is None: raise ValueError("order may not be None") - return _p2e(psource, center, rscale, order, psource.toy_ctx.get_p2m(order), - MultipoleExpansion, expn_kwargs) + return _p2e(actx, + psource, center, rscale, order, psource.toy_ctx.get_p2m(order), + MultipoleExpansion, expn_kwargs) elif isinstance(psource, MultipoleExpansion): if order is None: order = psource.order - return _e2e(psource, center, rscale, order, + return _e2e(actx, psource, center, rscale, order, psource.toy_ctx.get_m2m(psource.order, order), MultipoleExpansion, expn_kwargs) @@ -523,29 +531,33 @@ def multipole_expand(psource, center, order=None, rscale=1, **expn_kwargs): raise TypeError(f"do not know how to expand '{type(psource).__name__}'") -def local_expand(psource, center, order=None, rscale=1, **expn_kwargs): +def local_expand( + actx: PyOpenCLArrayContext, psource: PotentialSource, center, *, + order: Optional[int] = None, rscale: float = 1, + **expn_kwargs: Any): if isinstance(psource, PointSources): if order is None: raise ValueError("order may not be None") - return _p2e(psource, center, rscale, order, psource.toy_ctx.get_p2l(order), - LocalExpansion, expn_kwargs) + return _p2e(actx, + psource, center, rscale, order, psource.toy_ctx.get_p2l(order), + LocalExpansion, expn_kwargs) elif isinstance(psource, MultipoleExpansion): if order is None: order = psource.order - return _e2e(psource, center, rscale, order, - psource.toy_ctx.get_m2l(psource.order, order), - LocalExpansion, expn_kwargs) + return _e2e(actx, psource, center, rscale, order, + psource.toy_ctx.get_m2l(psource.order, order), + LocalExpansion, expn_kwargs) elif isinstance(psource, LocalExpansion): if order is None: order = psource.order - return _e2e(psource, center, rscale, order, - psource.toy_ctx.get_l2l(psource.order, order), - LocalExpansion, expn_kwargs) + return _e2e(actx, psource, center, rscale, order, + psource.toy_ctx.get_l2l(psource.order, order), + LocalExpansion, expn_kwargs) else: raise TypeError(f"do not know how to expand '{type(psource).__name__}'") From 1abff0770c8075b65b8377713ba5b71b3b3fafe9 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 3 Sep 2022 21:18:58 +0300 Subject: [PATCH 09/59] port test_tools to arraycontext --- test/test_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_tools.py b/test/test_tools.py index 6cc38b5d4..035607aba 100644 --- a/test/test_tools.py +++ b/test/test_tools.py @@ -90,7 +90,7 @@ def test_fft(actx_factory, size): out = fft(inp) fft_func = loopy_fft(inp.shape, inverse=False, complex_dtype=inp.dtype.type) - evt, (out_dev,) = fft_func(actx, y=inp_dev) + out_dev = actx.call_loopy(fft_func, y=inp_dev)["x"] assert np.allclose(actx.to_numpy(out_dev), out) From 5d95944ef682238549cb9e2aa3a8694ac3ac70cb Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 3 Sep 2022 21:19:04 +0300 Subject: [PATCH 10/59] port test_misc to arraycontext --- test/test_misc.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/test_misc.py b/test/test_misc.py index 0e1cd66e2..237b37ca8 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -112,7 +112,7 @@ def test_pde_check_kernels(actx_factory, knl_info, order=5): actx = actx_factory() dim = knl_info.kernel.dim - tctx = t.ToyContext(actx, knl_info.kernel, + tctx = t.ToyContext(knl_info.kernel, extra_source_kwargs=knl_info.extra_kwargs) rng = np.random.default_rng(42) @@ -127,7 +127,7 @@ def test_pde_check_kernels(actx_factory, knl_info, order=5): for h in [0.1, 0.05, 0.025]: cp = CalculusPatch(np.array([1, 0, 0])[:dim], h=h, order=order) - pot = pt_src.eval(cp.points) + pot = pt_src.eval(actx, cp.points) pde = knl_info.pde_func(cp, pot) @@ -297,19 +297,19 @@ def test_toy_p2e2e2p(actx_factory, case): from sumpy.expansion import VolumeTaylorExpansionFactory actx = actx_factory() - ctx = t.ToyContext(actx, + ctx = t.ToyContext( LaplaceKernel(dim), expansion_factory=VolumeTaylorExpansionFactory()) errors = [] src_pot = t.PointSources(ctx, src, weights=np.array([1.])) - pot_actual = src_pot.eval(tgt).item() + pot_actual = src_pot.eval(actx, tgt).item() for order in ORDERS_P2E2E2P: - expn = case.expansion1(src_pot, case.center1, order=order) - expn2 = case.expansion2(expn, case.center2, order=order) - pot_p2e2e2p = expn2.eval(tgt).item() + expn = case.expansion1(actx, src_pot, case.center1, order=order) + expn2 = case.expansion2(actx, expn, case.center2, order=order) + pot_p2e2e2p = expn2.eval(actx, tgt).item() errors.append(np.abs(pot_actual - pot_p2e2e2p)) conv_factor = approx_convergence_factor(1 + np.array(ORDERS_P2E2E2P), errors) From 9b4fbdeafb52bcb820652c8062ec8cbcc78d08e7 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sun, 4 Sep 2022 20:53:31 +0300 Subject: [PATCH 11/59] bump requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ff9ca871b..64de618f6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ git+https://github.com/inducer/pytools.git#egg=pytools git+https://github.com/inducer/pymbolic.git#egg=pymbolic git+https://github.com/inducer/islpy.git#egg=islpy git+https://github.com/inducer/pyopencl.git#egg=pyopencl -git+https://github.com/inducer/boxtree.git#egg=boxtree +git+https://github.com/alexfikl/boxtree.git@towards-array-context#egg=boxtree git+https://github.com/inducer/loopy.git#egg=loopy git+https://github.com/inducer/arraycontext.git#egg=arraycontext git+https://github.com/inducer/pyfmmlib.git#egg=pyfmmlib From 71e6c65cb1a5034c735b9e7111a98bb03cdece2a Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sun, 4 Sep 2022 21:25:24 +0300 Subject: [PATCH 12/59] start porting test_kernels to arraycontext --- sumpy/__init__.py | 2 +- sumpy/tools.py | 3 +-- test/test_kernels.py | 52 +++++++++++++++++++++++--------------------- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/sumpy/__init__.py b/sumpy/__init__.py index b39ead2ec..2cbc40a61 100644 --- a/sumpy/__init__.py +++ b/sumpy/__init__.py @@ -41,7 +41,7 @@ "M2LPreprocessMultipole", "M2LPostprocessLocal"] -code_cache = WriteOncePersistentDict("sumpy-code-cache-v6-"+VERSION_TEXT) +code_cache = WriteOncePersistentDict(f"sumpy-code-cache-v6-{VERSION_TEXT}") # {{{ optimization control diff --git a/sumpy/tools.py b/sumpy/tools.py index a97a31d74..61792c1d3 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -709,8 +709,7 @@ def get_cached_optimized_kernel(self, **kwargs): try: result = code_cache[cache_key] - logger.debug("{}: kernel cache hit [key={}]".format( - self.name, cache_key)) + logger.debug("%s: kernel cache hit [key=%s]", self.name, cache_key) return result except KeyError: pass diff --git a/test/test_kernels.py b/test/test_kernels.py index fdd65b865..085be01d1 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -69,9 +69,7 @@ def test_p2p(actx_factory, exclude_self): from sumpy.p2p import P2P lknl = LaplaceKernel(dimensions) - knl = P2P(actx, - [lknl, AxisTargetDerivative(0, lknl)], - exclude_self=exclude_self) + knl = P2P([lknl, AxisTargetDerivative(0, lknl)], exclude_self=exclude_self) rng = np.random.default_rng(42) targets = rng.random(size=(dimensions, n)) @@ -82,12 +80,17 @@ def test_p2p(actx_factory, exclude_self): extra_kwargs = {} if exclude_self: - extra_kwargs["target_to_source"] = np.arange(n, dtype=np.int32) - - evt, (potential, x_derivative) = knl( - actx, targets, sources, [strengths], - out_host=True, **extra_kwargs) + extra_kwargs["target_to_source"] = ( + actx.from_numpy(np.arange(n, dtype=np.int32))) + + result = knl( + actx, + actx.from_numpy(targets), + actx.from_numpy(sources), + [actx.from_numpy(strengths)], + **extra_kwargs) + potential = actx.to_numpy(result["result_s0"]) potential_ref = np.empty_like(potential) targets = targets.T @@ -151,8 +154,8 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): + center[:, np.newaxis]) strengths = [ - np.ones(nsources, dtype=np.float64) * (1/nsources), - np.ones(nsources, dtype=np.float64) * (2/nsources) + actx.from_numpy(np.full(nsources, 1 / nsources, dtype=np.float64)), + actx.from_numpy(np.full(nsources, 2 / nsources, dtype=np.float64)) ] source_boxes = np.array([0], dtype=np.int32) @@ -174,7 +177,7 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): rscale = 0.5 # pick something non-1 # apply p2e at the same time - p2e = P2EFromSingleBox(actx, expn, + p2e = P2EFromSingleBox(expn, kernels=source_kernels, strength_usage=[0, 1]) @@ -202,7 +205,7 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): if isinstance(source_kernel, DirectionalSourceDerivative): extra_source_kwargs["dir_vec"] = dir_vec - p2e = P2EFromSingleBox(actx, expn, + p2e = P2EFromSingleBox(expn, kernels=[source_kernel], strength_usage=[i]) evt, (mpoles,) = p2e(actx, @@ -286,9 +289,9 @@ def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative expn = expn_class(knl, order=order) from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P - p2e = P2EFromSingleBox(actx, expn, kernels=[knl]) - e2p = E2PFromSingleBox(actx, expn, kernels=target_kernels) - p2p = P2P(actx, target_kernels, exclude_self=False) + p2e = P2EFromSingleBox(expn, kernels=[knl]) + e2p = E2PFromSingleBox(expn, kernels=target_kernels) + p2p = P2P(target_kernels, exclude_self=False) from pytools.convergence import EOCRecorder eoc_rec_pot = EOCRecorder() @@ -576,13 +579,13 @@ def eval_at(e2p, source_box_nr, rscale): l_expn = local_expn_class(knl, order=order, m2l_translation=m2l_translation) from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P, E2EFromCSR - p2m = P2EFromSingleBox(actx, m_expn) - m2m = E2EFromCSR(actx, m_expn, m_expn) - m2p = E2PFromSingleBox(actx, m_expn, target_kernels) - m2l = E2EFromCSR(actx, m_expn, l_expn) - l2l = E2EFromCSR(actx, l_expn, l_expn) - l2p = E2PFromSingleBox(actx, l_expn, target_kernels) - p2p = P2P(actx, target_kernels, exclude_self=False) + p2m = P2EFromSingleBox(m_expn) + m2m = E2EFromCSR(m_expn, m_expn) + m2p = E2PFromSingleBox(m_expn, target_kernels) + m2l = E2EFromCSR(m_expn, l_expn) + l2l = E2EFromCSR(l_expn, l_expn) + l2p = E2PFromSingleBox(l_expn, target_kernels) + p2p = P2P(target_kernels, exclude_self=False) fp = FieldPlotter(centers[:, -1], extent=0.3, npoints=res) targets = fp.points @@ -917,7 +920,6 @@ def test_m2m_compressed_error_helmholtz(actx_factory, dim, order): for i, (mpole_expn_class, local_expn_class) in \ enumerate(zip(mpole_expn_classes, local_expn_classes)): tctx = toys.ToyContext( - actx, knl, extra_kernel_kwargs=extra_kernel_kwargs, local_expn_class=local_expn_class, @@ -929,11 +931,11 @@ def test_m2m_compressed_error_helmholtz(actx_factory, dim, order): np.ones(sources.shape[-1]) ) - mexp = toys.multipole_expand(pt_src, + mexp = toys.multipole_expand(actx, pt_src, center=mpole_center.reshape(dim), order=order, rscale=h) - mexp2 = toys.multipole_expand(mexp, + mexp2 = toys.multipole_expand(actx, mexp, center=second_center.reshape(dim), order=order, rscale=h) From 7086997f97a0bfb1395b19ec6543fb10e7c25e83 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 5 Sep 2022 09:09:37 +0300 Subject: [PATCH 13/59] more porting in test_kernels --- test/test_kernels.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/test/test_kernels.py b/test/test_kernels.py index 085be01d1..894dcac42 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -149,7 +149,7 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): rng = np.random.default_rng(14) center = np.array([2, 1, 0][:knl.dim], np.float64) - sources = ( + sources = actx.from_numpy( 0.7 * (-0.5 + rng.random(size=(knl.dim, nsources), dtype=np.float64)) + center[:, np.newaxis]) @@ -158,12 +158,14 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): actx.from_numpy(np.full(nsources, 2 / nsources, dtype=np.float64)) ] - source_boxes = np.array([0], dtype=np.int32) - box_source_starts = np.array([0], dtype=np.int32) - box_source_counts_nonchild = np.array([nsources], dtype=np.int32) + source_boxes = actx.zeros(1, dtype=np.int32) + box_source_starts = actx.zeros(1, dtype=np.int32) + box_source_counts_nonchild = ( + actx.from_numpy(np.array([nsources], dtype=np.int32))) alpha = np.linspace(0, 2*np.pi, nsources, np.float64) - dir_vec = np.vstack([np.cos(alpha), np.sin(alpha)]) + dir_vec = actx.from_numpy( + np.vstack([np.cos(alpha), np.sin(alpha)])) from sumpy.expansion.local import LocalExpansionBase if issubclass(expn_class, LocalExpansionBase): @@ -174,6 +176,7 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): dtype=np.float64).reshape(knl.dim, 1) + center[:, np.newaxis]) + centers = actx.from_numpy(centers) rscale = 0.5 # pick something non-1 # apply p2e at the same time @@ -181,7 +184,7 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): kernels=source_kernels, strength_usage=[0, 1]) - evt, (mpoles,) = p2e(actx, + mpoles = p2e(actx, source_boxes=source_boxes, box_source_starts=box_source_starts, box_source_counts_nonchild=box_source_counts_nonchild, @@ -191,12 +194,9 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): nboxes=1, tgt_base_ibox=0, rscale=rscale, - - out_host=True, dir_vec=dir_vec, - **extra_kwargs) - - actual_result = mpoles + **extra_kwargs)["tgt_expansions"] + actual_result = actx.to_numpy(mpoles) # apply p2e separately expected_result = np.zeros_like(actual_result) @@ -208,7 +208,7 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): p2e = P2EFromSingleBox(expn, kernels=[source_kernel], strength_usage=[i]) - evt, (mpoles,) = p2e(actx, + mpoles = p2e(actx, source_boxes=source_boxes, box_source_starts=box_source_starts, box_source_counts_nonchild=box_source_counts_nonchild, @@ -218,11 +218,10 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): nboxes=1, tgt_base_ibox=0, rscale=rscale, + **extra_source_kwargs)["tgt_expansions"] + expected_result += actx.to_numpy(mpoles) - out_host=True, **extra_source_kwargs) - expected_result += mpoles - - norm = la.norm(actual_result - expected_result)/la.norm(expected_result) + norm = la.norm(actual_result - expected_result) / la.norm(expected_result) assert norm < 1e-12 # }}} From 45230202491703dcc11c03c4ef1c176bba548be3 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 5 Sep 2022 09:12:11 +0300 Subject: [PATCH 14/59] add arraycontext to docs --- doc/conf.py | 1 + doc/misc.rst | 2 ++ sumpy/array_context.py | 6 +++++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index 5dcae4e44..b69774dd9 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -24,6 +24,7 @@ "https://documen.tician.de/loopy/": None, "https://documen.tician.de/pytential/": None, "https://documen.tician.de/boxtree/": None, + "https://documen.tician.de/arraycontext/": None, "https://docs.sympy.org/latest/": None, "https://matplotlib.org/stable/": None, } diff --git a/doc/misc.rst b/doc/misc.rst index 10b5042d5..2357f5b58 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -1,6 +1,8 @@ Misc Tools ========== +.. automodule:: sumpy.array_context + .. automodule:: sumpy.tools .. automodule:: sumpy.symbolic diff --git a/sumpy/array_context.py b/sumpy/array_context.py index 6463e1a79..8723ed159 100644 --- a/sumpy/array_context.py +++ b/sumpy/array_context.py @@ -26,6 +26,10 @@ register_pytest_array_context_factory) __doc__ = """ +Array Context +============= + +.. autofunction:: make_loopy_program .. autoclass:: PyOpenCLArrayContext """ @@ -40,7 +44,7 @@ def make_loopy_program( index_dtype=None, tags=None): """Return a :class:`loopy.LoopKernel` suitable for use with - :meth:`ArrayContext.call_loopy`. + :meth:`arraycontext.ArrayContext.call_loopy`. """ if kernel_data is None: kernel_data = [...] From aa5a50b3cf6322fba134b87f9cc2ff1427317927 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 5 Sep 2022 18:47:21 +0300 Subject: [PATCH 15/59] add some annotations to make_loopy_program --- sumpy/array_context.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sumpy/array_context.py b/sumpy/array_context.py index 8723ed159..92b07daa3 100644 --- a/sumpy/array_context.py +++ b/sumpy/array_context.py @@ -20,10 +20,15 @@ THE SOFTWARE. """ +from typing import Any, List, Optional, Union + +import numpy as np + from boxtree.array_context import PyOpenCLArrayContext as PyOpenCLArrayContextBase from arraycontext.pytest import ( _PytestPyOpenCLArrayContextFactoryWithClass, register_pytest_array_context_factory) +from pytools.tag import ToTagSetConvertible __doc__ = """ Array Context @@ -38,11 +43,11 @@ def make_loopy_program( domains, statements, - kernel_data=None, *, - name="sumpy_loopy_kernel", - silenced_warnings=None, - index_dtype=None, - tags=None): + kernel_data: Optional[List[Any]] = None, *, + name: str = "sumpy_loopy_kernel", + silenced_warnings: Optional[Union[List[str], str]] = None, + index_dtype: Optional["np.dtype"] = None, + tags: ToTagSetConvertible = None): """Return a :class:`loopy.LoopKernel` suitable for use with :meth:`arraycontext.ArrayContext.call_loopy`. """ From bfce3295e34d5888ed5fa374ab31aa9cf31b2cb2 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 5 Sep 2022 19:15:37 +0300 Subject: [PATCH 16/59] finish porting in test_kernels --- test/test_kernels.py | 140 +++++++++++++++++++++++-------------------- 1 file changed, 76 insertions(+), 64 deletions(-) diff --git a/test/test_kernels.py b/test/test_kernels.py index 894dcac42..4b621faea 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -31,6 +31,7 @@ from sumpy.array_context import ( # noqa: F401 PytestPyOpenCLArrayContextFactory, _acf) +from pytools.obj_array import make_obj_array import sumpy.symbolic as sym from sumpy.expansion.multipole import ( VolumeTaylorMultipoleExpansion, @@ -304,21 +305,23 @@ def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative rng = np.random.default_rng(19) center = np.array([2, 1, 0][:knl.dim], np.float64) - sources = ( + sources = actx.from_numpy( 0.7 * (-0.5 + rng.random((knl.dim, nsources), dtype=np.float64)) + center[:, np.newaxis]) - strengths = np.ones(nsources, dtype=np.float64) / nsources + strengths = actx.from_numpy( + np.full(nsources, 1 / nsources, dtype=np.float64)) - source_boxes = np.array([0], dtype=np.int32) - box_source_starts = np.array([0], dtype=np.int32) - box_source_counts_nonchild = np.array([nsources], dtype=np.int32) + source_boxes = actx.zeros(1, dtype=np.int32) + box_source_starts = actx.zeros(1, dtype=np.int32) + box_source_counts_nonchild = actx.from_numpy( + np.array([nsources], dtype=np.int32)) extra_source_kwargs = extra_kwargs.copy() if isinstance(knl, DirectionalSourceDerivative): alpha = np.linspace(0, 2*np.pi, nsources, np.float64) dir_vec = np.vstack([np.cos(alpha), np.sin(alpha)]) - extra_source_kwargs["dir_vec"] = dir_vec + extra_source_kwargs["dir_vec"] = actx.from_numpy(dir_vec) from sumpy.visualization import FieldPlotter @@ -334,13 +337,14 @@ def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative dtype=np.float64).reshape(knl.dim, 1) + center[:, np.newaxis]) - targets = fp.points + centers = actx.from_numpy(centers) + targets = actx.from_numpy(make_obj_array(fp.points)) rscale = 0.5 # pick something non-1 # {{{ apply p2e - evt, (mpoles,) = p2e(actx, + mpoles = p2e(actx, source_boxes=source_boxes, box_source_starts=box_source_starts, box_source_counts_nonchild=box_source_counts_nonchild, @@ -350,19 +354,19 @@ def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative nboxes=1, tgt_base_ibox=0, rscale=rscale, - - out_host=True, **extra_source_kwargs) + **extra_source_kwargs)["tgt_expansions"] # }}} # {{{ apply e2p - ntargets = targets.shape[-1] + ntargets = fp.points.shape[-1] - box_target_starts = np.array([0], dtype=np.int32) - box_target_counts_nonchild = np.array([ntargets], dtype=np.int32) + box_target_starts = actx.zeros(1, dtype=np.int32) + box_target_counts_nonchild = actx.from_numpy( + np.array([ntargets], dtype=np.int32)) - evt, (pot, grad_x, ) = e2p( + result = e2p( actx, src_expansions=mpoles, src_base_ibox=0, @@ -372,19 +376,23 @@ def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative centers=centers, targets=targets, rscale=rscale, + **extra_kwargs) - out_host=True, **extra_kwargs) + pot = actx.to_numpy(result["result_s0"]) + grad_x = actx.to_numpy(result["result_s1"]) # }}} # {{{ compute (direct) reference solution - evt, (pot_direct, grad_x_direct, ) = p2p( + result = p2p( actx, targets, sources, (strengths,), - out_host=True, **extra_source_kwargs) + pot_direct = actx.to_numpy(result["result_s0"]) + grad_x_direct = actx.to_numpy(result["result_s1"]) + err_pot = la.norm((pot - pot_direct)/res**2) err_grad_x = la.norm((grad_x - grad_x_direct)/res**2) @@ -503,10 +511,11 @@ def test_translations(actx_factory, knl, local_expn_class, mpole_expn_class, # Just to make sure things also work away from the origin rng = np.random.default_rng(18) origin = np.array([2, 1, 0][:knl.dim], np.float64) - sources = ( + sources = actx.from_numpy( 0.7 * (-0.5 + rng.random((knl.dim, nsources), dtype=np.float64)) + origin[:, np.newaxis]) - strengths = np.ones(nsources, dtype=np.float64) * (1/nsources) + strengths = actx.from_numpy( + np.full(nsources, 1 / nsources, dtype=np.float64)) pconv_verifier_p2m2p = PConvergenceVerifier() pconv_verifier_p2m2m2p = PConvergenceVerifier() @@ -516,7 +525,6 @@ def test_translations(actx_factory, knl, local_expn_class, mpole_expn_class, from sumpy.visualization import FieldPlotter eval_offset = np.array([5.5, 0.0, 0][:knl.dim]) - centers = (np.array( [ # box 0: particles, first mpole here @@ -543,32 +551,28 @@ def test_translations(actx_factory, knl, local_expn_class, mpole_expn_class, nboxes = centers.shape[-1] def eval_at(e2p, source_box_nr, rscale): - e2p_target_boxes = np.array([source_box_nr], dtype=np.int32) + e2p_target_boxes = actx.from_numpy( + np.array([source_box_nr], dtype=np.int32)) # These are indexed by global box numbers. - e2p_box_target_starts = np.array([0, 0, 0, 0], dtype=np.int32) - e2p_box_target_counts_nonchild = np.array([0, 0, 0, 0], - dtype=np.int32) + e2p_box_target_starts = actx.zeros(4, dtype=np.int32) + e2p_box_target_counts_nonchild = actx.zeros(4, dtype=np.int32) e2p_box_target_counts_nonchild[source_box_nr] = ntargets - evt, (pot,) = e2p( + pot = e2p( actx, - src_expansions=mpoles, src_base_ibox=0, - target_boxes=e2p_target_boxes, box_target_starts=e2p_box_target_starts, box_target_counts_nonchild=e2p_box_target_counts_nonchild, - centers=centers, + centers=actx.from_numpy(centers), targets=targets, - rscale=rscale, - - out_host=True, **extra_kwargs + **extra_kwargs ) - return pot + return actx.to_numpy(pot["result_s0"]) m2l_factory = NonFFTM2LTranslationClassFactory() m2l_translation = m2l_factory.get_m2l_translation_class(knl, local_expn_class)() @@ -587,14 +591,15 @@ def eval_at(e2p, source_box_nr, rscale): p2p = P2P(target_kernels, exclude_self=False) fp = FieldPlotter(centers[:, -1], extent=0.3, npoints=res) - targets = fp.points + targets = actx.from_numpy(make_obj_array(fp.points)) # {{{ compute (direct) reference solution - evt, (pot_direct,) = p2p( + pot_direct = p2p( actx, targets, sources, (strengths,), - out_host=True, **extra_kwargs) + **extra_kwargs) + pot_direct = actx.to_numpy(pot_direct["result_s0"]) # }}} @@ -605,30 +610,28 @@ def eval_at(e2p, source_box_nr, rscale): # {{{ apply P2M - p2m_source_boxes = np.array([0], dtype=np.int32) + p2m_source_boxes = actx.zeros(1, dtype=np.int32) # These are indexed by global box numbers. - p2m_box_source_starts = np.array([0, 0, 0, 0], dtype=np.int32) - p2m_box_source_counts_nonchild = np.array([nsources, 0, 0, 0], - dtype=np.int32) + p2m_box_source_starts = actx.zeros(4, dtype=np.int32) + p2m_box_source_counts_nonchild = actx.from_numpy( + np.array([nsources, 0, 0, 0], dtype=np.int32)) - evt, (mpoles,) = p2m(actx, + mpoles = p2m(actx, source_boxes=p2m_source_boxes, box_source_starts=p2m_box_source_starts, box_source_counts_nonchild=p2m_box_source_counts_nonchild, - centers=centers, + centers=actx.from_numpy(centers), sources=sources, strengths=(strengths,), nboxes=nboxes, rscale=m1_rscale, - tgt_base_ibox=0, - - out_host=True, **extra_kwargs) + **extra_kwargs)["tgt_expansions"] # }}} - ntargets = targets.shape[-1] + ntargets = fp.points.shape[-1] pot = eval_at(m2p, 0, m1_rscale) @@ -639,11 +642,14 @@ def eval_at(e2p, source_box_nr, rscale): # {{{ apply M2M - m2m_target_boxes = np.array([1], dtype=np.int32) - m2m_src_box_starts = np.array([0, 1], dtype=np.int32) - m2m_src_box_lists = np.array([0], dtype=np.int32) + m2m_target_boxes = actx.from_numpy( + np.array([1], dtype=np.int32)) + m2m_src_box_starts = actx.from_numpy( + np.array([0, 1], dtype=np.int32)) + m2m_src_box_lists = actx.from_numpy( + np.array([0], dtype=np.int32)) - evt, (mpoles,) = m2m(actx, + mpoles = m2m(actx, src_expansions=mpoles, src_base_ibox=0, tgt_base_ibox=0, @@ -653,12 +659,12 @@ def eval_at(e2p, source_box_nr, rscale): src_box_starts=m2m_src_box_starts, src_box_lists=m2m_src_box_lists, - centers=centers, + centers=actx.from_numpy(centers), src_rscale=m1_rscale, tgt_rscale=m2_rscale, - out_host=True, **extra_kwargs) + **extra_kwargs)["tgt_expansions"] # }}} @@ -671,11 +677,14 @@ def eval_at(e2p, source_box_nr, rscale): # {{{ apply M2L - m2l_target_boxes = np.array([2], dtype=np.int32) - m2l_src_box_starts = np.array([0, 1], dtype=np.int32) - m2l_src_box_lists = np.array([1], dtype=np.int32) + m2l_target_boxes = actx.from_numpy( + np.array([2], dtype=np.int32)) + m2l_src_box_starts = actx.from_numpy( + np.array([0, 1], dtype=np.int32)) + m2l_src_box_lists = actx.from_numpy( + np.array([1], dtype=np.int32)) - evt, (mpoles,) = m2l(actx, + mpoles = m2l(actx, src_expansions=mpoles, src_base_ibox=0, tgt_base_ibox=0, @@ -684,12 +693,12 @@ def eval_at(e2p, source_box_nr, rscale): target_boxes=m2l_target_boxes, src_box_starts=m2l_src_box_starts, src_box_lists=m2l_src_box_lists, - centers=centers, + centers=actx.from_numpy(centers), src_rscale=m2_rscale, tgt_rscale=l1_rscale, - out_host=True, **extra_kwargs) + **extra_kwargs)["tgt_expansions"] # }}} @@ -702,11 +711,14 @@ def eval_at(e2p, source_box_nr, rscale): # {{{ apply L2L - l2l_target_boxes = np.array([3], dtype=np.int32) - l2l_src_box_starts = np.array([0, 1], dtype=np.int32) - l2l_src_box_lists = np.array([2], dtype=np.int32) + l2l_target_boxes = actx.from_numpy( + np.array([3], dtype=np.int32)) + l2l_src_box_starts = actx.from_numpy( + np.array([0, 1], dtype=np.int32)) + l2l_src_box_lists = actx.from_numpy( + np.array([2], dtype=np.int32)) - evt, (mpoles,) = l2l(actx, + mpoles = l2l(actx, src_expansions=mpoles, src_base_ibox=0, tgt_base_ibox=0, @@ -715,12 +727,12 @@ def eval_at(e2p, source_box_nr, rscale): target_boxes=l2l_target_boxes, src_box_starts=l2l_src_box_starts, src_box_lists=l2l_src_box_lists, - centers=centers, + centers=actx.from_numpy(centers), src_rscale=l1_rscale, tgt_rscale=l2_rscale, - out_host=True, **extra_kwargs) + **extra_kwargs)["tgt_expansions"] # }}} @@ -938,7 +950,7 @@ def test_m2m_compressed_error_helmholtz(actx_factory, dim, order): center=second_center.reshape(dim), order=order, rscale=h) - m2m_vals[i] = mexp2.eval(targets) + m2m_vals[i] = mexp2.eval(actx, targets) err = np.linalg.norm(m2m_vals[1] - m2m_vals[0]) \ / np.linalg.norm(m2m_vals[1]) From 80b1045862d96109bdb8c89d576d2d9d8acd1dee Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 5 Sep 2022 20:20:33 +0300 Subject: [PATCH 17/59] port qbx to arraycontext --- sumpy/array_context.py | 5 ++ sumpy/qbx.py | 159 ++++++++++++++++++++++++----------------- 2 files changed, 98 insertions(+), 66 deletions(-) diff --git a/sumpy/array_context.py b/sumpy/array_context.py index 92b07daa3..1aa0e4c02 100644 --- a/sumpy/array_context.py +++ b/sumpy/array_context.py @@ -85,6 +85,11 @@ def transform_loopy_program(self, t_unit): return t_unit + +def is_cl_cpu(actx: PyOpenCLArrayContext) -> bool: + import pyopencl as cl + return all(dev.type & cl.device_type.CPU for dev in actx.context.devices) + # }}} diff --git a/sumpy/qbx.py b/sumpy/qbx.py index de2598881..b476eb4f0 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -23,16 +23,14 @@ THE SOFTWARE. """ +from typing import Tuple, Union import numpy as np import loopy as lp -from loopy.version import MOST_RECENT_LANGUAGE_VERSION -import sumpy.symbolic as sym -from pytools import memoize_method -from pymbolic import parse, var -from sumpy.tools import ( - KernelComputation, KernelCacheWrapper, is_obj_array_like) +from pytools import memoize_method +from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program, is_cl_cpu +from sumpy.tools import KernelComputation, KernelCacheMixin, is_obj_array_like import logging logger = logging.getLogger(__name__) @@ -51,7 +49,7 @@ """ -def stringify_expn_index(i): +def stringify_expn_index(i: Union[Tuple[int, ...], int]) -> str: if isinstance(i, tuple): return "_".join(stringify_expn_index(i_i) for i_i in i) else: @@ -66,16 +64,19 @@ def stringify_expn_index(i): # {{{ base class -class LayerPotentialBase(KernelComputation, KernelCacheWrapper): - def __init__(self, ctx, expansion, strength_usage=None, - value_dtypes=None, name=None, device=None, +class LayerPotentialBase(KernelCacheMixin, KernelComputation): + def __init__(self, expansion, strength_usage=None, + value_dtypes=None, name=None, source_kernels=None, target_kernels=None): from pytools import single_valued - KernelComputation.__init__(self, ctx=ctx, target_kernels=target_kernels, - strength_usage=strength_usage, source_kernels=source_kernels, - value_dtypes=value_dtypes, name=name, device=device) + KernelComputation.__init__(self, + target_kernels=target_kernels, + source_kernels=source_kernels, + strength_usage=strength_usage, + value_dtypes=value_dtypes, + name=name) self.dim = single_valued(knl.dim for knl in self.target_kernels) self.expansion = expansion @@ -83,8 +84,7 @@ def __init__(self, ctx, expansion, strength_usage=None, def get_cache_key(self): return (type(self).__name__, self.expansion, tuple(self.target_kernels), tuple(self.source_kernels), tuple(self.strength_usage), - tuple(self.value_dtypes), - self.device.hashable_model_and_version_identifier) + tuple(self.value_dtypes)) def _expand(self, sac, avec, bvec, rscale, isrc): from sumpy.symbolic import PymbolicToSympyMapper @@ -108,6 +108,7 @@ def _evaluate(self, sac, avec, bvec, rscale, expansion_nr, coefficients): coefficients = [tgt_knl.postprocess_at_target(coeff, bvec) for coeff in coefficients] + import sumpy.symbolic as sym assigned_coeffs = [ sym.Symbol( sac.assign_unique( @@ -129,8 +130,10 @@ def get_loopy_insns_and_result_names(self): logger.info("compute expansion expressions: start") + import sumpy.symbolic as sym + import pymbolic as prim rscale = sym.Symbol("rscale") - isrc_sym = var("isrc") + isrc_sym = prim.var("isrc") coefficients = self._expand(sac, avec, bvec, rscale, isrc_sym) result_names = [self._evaluate(sac, avec, bvec, rscale, i, coefficients) @@ -155,10 +158,12 @@ def get_loopy_insns_and_result_names(self): return loopy_insns, result_names def get_strength_or_not(self, isrc, kernel_idx): - return var(f"strength_{self.strength_usage[kernel_idx]}_isrc") + import pymbolic as prim + return prim.var(f"strength_{self.strength_usage[kernel_idx]}_isrc") def get_kernel_exprs(self, result_names): - exprs = [var(name) for i, name in enumerate(result_names)] + import pymbolic as prim + exprs = [prim.var(name) for i, name in enumerate(result_names)] return [lp.Assignment(id=None, assignee=f"pair_result_{i}", @@ -184,12 +189,15 @@ def get_default_src_tgt_arguments(self): def get_kernel(self): raise NotImplementedError - def get_optimized_kernel(self, - targets_is_obj_array, sources_is_obj_array, centers_is_obj_array, + def get_optimized_kernel(self, *, + is_cpu: bool, + targets_is_obj_array: bool, + sources_is_obj_array: bool, + centers_is_obj_array: bool, # Used by pytential to override the name of the loop to be # parallelized. In the case of QBX, that's the loop over QBX # targets (not global targets). - itgt_name="itgt"): + itgt_name: str = "itgt"): # FIXME specialize/tune for GPU/CPU loopy_knl = self.get_kernel() @@ -200,9 +208,7 @@ def get_optimized_kernel(self, if centers_is_obj_array: loopy_knl = lp.tag_array_axes(loopy_knl, "center", "sep,C") - import pyopencl as cl - dev = self.context.devices[0] - if dev.type & cl.device_type.CPU: + if is_cpu: loopy_knl = lp.split_iname(loopy_knl, itgt_name, 16, outer_tag="g.0", inner_tag="l.0") loopy_knl = lp.split_iname(loopy_knl, "isrc", 256) @@ -210,8 +216,11 @@ def get_optimized_kernel(self, ["isrc_outer", f"{itgt_name}_inner"]) else: from warnings import warn - warn(f"don't know how to tune layer potential computation for '{dev}'") + warn( + "Do not know how to tune layer potential computation for " + "non-CPU targets") loopy_knl = lp.split_iname(loopy_knl, itgt_name, 128, outer_tag="g.0") + loopy_knl = self._allow_redundant_execution_of_knl_scaling(loopy_knl) return loopy_knl @@ -244,7 +253,7 @@ def get_kernel(self): for i in range(len(self.target_kernels)) ]) - loopy_knl = lp.make_kernel([""" + loopy_knl = make_loopy_program([""" {[itgt, isrc, idim]: \ 0 <= itgt < ntargets and \ 0 <= isrc < nsources and \ @@ -265,13 +274,13 @@ def get_kernel(self): """.format(i=iknl) for iknl in range(len(self.target_kernels))] + ["end"], - arguments, + kernel_data=arguments, name=self.name, - assumptions="ntargets>=1 and nsources>=1", - default_offset=lp.auto, silenced_warnings="write_race(write_lpot*)", - fixed_parameters=dict(dim=self.dim), - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) + + loopy_knl = lp.assume(loopy_knl, "ntargets>=1 and nsources>=1") + loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.target_kernels + self.source_kernels: @@ -279,7 +288,8 @@ def get_kernel(self): return loopy_knl - def __call__(self, queue, targets, sources, centers, strengths, expansion_radii, + def __call__(self, actx: PyOpenCLArrayContext, + targets, sources, centers, strengths, expansion_radii, **kwargs): """ :arg strengths: are required to have area elements and quadrature weights @@ -287,6 +297,7 @@ def __call__(self, queue, targets, sources, centers, strengths, expansion_radii, """ knl = self.get_cached_optimized_kernel( + is_cpu=is_cl_cpu(actx), targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources), centers_is_obj_array=is_obj_array_like(centers)) @@ -294,8 +305,10 @@ def __call__(self, queue, targets, sources, centers, strengths, expansion_radii, for i, dens in enumerate(strengths): kwargs[f"strength_{i}"] = dens - return knl(queue, sources=sources, targets=targets, center=centers, - expansion_radii=expansion_radii, **kwargs) + return actx.call_loopy( + knl, + sources=sources, targets=targets, center=centers, + expansion_radii=expansion_radii, **kwargs) # }}} @@ -322,7 +335,7 @@ def get_kernel(self): for i, dtype in enumerate(self.value_dtypes) ]) - loopy_knl = lp.make_kernel([""" + loopy_knl = make_loopy_program([""" {[itgt, isrc, idim]: \ 0 <= itgt < ntargets and \ 0 <= isrc < nsources and \ @@ -341,12 +354,12 @@ def get_kernel(self): """.format(i=iknl) for iknl in range(len(self.target_kernels))] + ["end"], - arguments, + kernel_data=arguments, name=self.name, - assumptions="ntargets>=1 and nsources>=1", - default_offset=lp.auto, - fixed_parameters=dict(dim=self.dim), - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) + + loopy_knl = lp.assume(loopy_knl, "ntargets>=1 and nsources>=1") + loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for expn in self.source_kernels + self.target_kernels: @@ -354,14 +367,18 @@ def get_kernel(self): return loopy_knl - def __call__(self, queue, targets, sources, centers, expansion_radii, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, + targets, sources, centers, expansion_radii, **kwargs): knl = self.get_cached_optimized_kernel( + is_cpu=is_cl_cpu(actx), targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources), centers_is_obj_array=is_obj_array_like(centers)) - return knl(queue, sources=sources, targets=targets, center=centers, - expansion_radii=expansion_radii, **kwargs) + return actx.call_loopy( + knl, + sources=sources, targets=targets, center=centers, + expansion_radii=expansion_radii, **kwargs) # }}} @@ -395,7 +412,7 @@ def get_kernel(self): for i, dtype in enumerate(self.value_dtypes) ]) - loopy_knl = lp.make_kernel([ + loopy_knl = make_loopy_program([ "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}" ], self.get_kernel_scaling_assignments() @@ -418,13 +435,13 @@ def get_kernel(self): """.format(i=iknl) for iknl in range(len(self.target_kernels))] + ["end"], - arguments, + kernel_data=arguments, name=self.name, - assumptions="nresult>=1", - default_offset=lp.auto, silenced_warnings="write_race(write_lpot*)", - fixed_parameters=dict(dim=self.dim), - lang_version=MOST_RECENT_LANGUAGE_VERSION) + ) + + loopy_knl = lp.assume(loopy_knl, "nresult>=1") + loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.add_dtypes(loopy_knl, @@ -450,8 +467,9 @@ def get_optimized_kernel(self, loopy_knl = self._allow_redundant_execution_of_knl_scaling(loopy_knl) return loopy_knl - def __call__(self, queue, targets, sources, centers, expansion_radii, - tgtindices, srcindices, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, + targets, sources, centers, expansion_radii, + tgtindices, srcindices, **kwargs): """Evaluate a subset of the QBX matrix interactions. :arg targets: target point coordinates, which can be an object @@ -478,13 +496,14 @@ def __call__(self, queue, targets, sources, centers, expansion_radii, sources_is_obj_array=is_obj_array_like(sources), centers_is_obj_array=is_obj_array_like(centers)) - return knl(queue, - sources=sources, - targets=targets, - center=centers, - expansion_radii=expansion_radii, - tgtindices=tgtindices, - srcindices=srcindices, **kwargs) + return actx.call_loopy( + knl, + sources=sources, + targets=targets, + center=centers, + expansion_radii=expansion_radii, + tgtindices=tgtindices, + srcindices=srcindices, **kwargs) # }}} @@ -602,80 +621,88 @@ def __init__(self, data_args, dim, density_var_name, @property @memoize_method def density(self): + import pymbolic as prim self.arguments[self.density_var_name] = \ lp.GlobalArg(self.density_var_name, self.density_dtype, shape="ntargets", order="C") - return parse(f"{self.density_var_name}[itgt]") + return prim.parse(f"{self.density_var_name}[itgt]") @property @memoize_method def density_prime(self): + import pymbolic as prim prime_var_name = f"{self.density_var_name}_prime" self.arguments[prime_var_name] = ( lp.GlobalArg(prime_var_name, self.density_dtype, shape="ntargets", order="C")) - return parse(f"{prime_var_name}[itgt]") + return prim.parse(f"{prime_var_name}[itgt]") @property @memoize_method def side(self): + import pymbolic as prim self.arguments["side"] = ( lp.GlobalArg("side", self.geometry_dtype, shape="ntargets")) - return parse("side[itgt]") + return prim.parse("side[itgt]") @property @memoize_method def normal(self): + import pymbolic as prim self.arguments["normal"] = ( lp.GlobalArg("normal", self.geometry_dtype, shape=("ntargets", self.dim), order="C")) from pytools.obj_array import make_obj_array return make_obj_array([ - parse(f"normal[itgt, {i}]") + prim.parse(f"normal[itgt, {i}]") for i in range(self.dim)]) @property @memoize_method def tangent(self): + import pymbolic as prim self.arguments["tangent"] = ( lp.GlobalArg("tangent", self.geometry_dtype, shape=("ntargets", self.dim), order="C")) from pytools.obj_array import make_obj_array return make_obj_array([ - parse(f"tangent[itgt, {i}]") + prim.parse(f"tangent[itgt, {i}]") for i in range(self.dim)]) @property @memoize_method def mean_curvature(self): + import pymbolic as prim self.arguments["mean_curvature"] = ( lp.GlobalArg("mean_curvature", self.geometry_dtype, shape="ntargets", order="C")) - return parse("mean_curvature[itgt]") + return prim.parse("mean_curvature[itgt]") @property @memoize_method def src_derivative_dir(self): + import pymbolic as prim self.arguments["src_derivative_dir"] = ( lp.GlobalArg("src_derivative_dir", self.geometry_dtype, shape=("ntargets", self.dim), order="C")) from pytools.obj_array import make_obj_array return make_obj_array([ - parse(f"src_derivative_dir[itgt, {i}]") + prim.parse(f"src_derivative_dir[itgt, {i}]") for i in range(self.dim)]) @property @memoize_method def tgt_derivative_dir(self): + import pymbolic as prim self.arguments["tgt_derivative_dir"] = ( lp.GlobalArg("tgt_derivative_dir", self.geometry_dtype, shape=("ntargets", self.dim), order="C")) from pytools.obj_array import make_obj_array return make_obj_array([ - parse(f"tgt_derivative_dir[itgt, {i}]") + prim.parse(f"tgt_derivative_dir[itgt, {i}]") for i in range(self.dim)]) # }}} From 81b92bd7b6a3aa71fe09d0c2876e061eda262272 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 5 Sep 2022 20:20:44 +0300 Subject: [PATCH 18/59] port curve-pot to arraycontext --- examples/curve-pot.py | 47 +++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/examples/curve-pot.py b/examples/curve-pot.py index 8d1a91a21..3ac66a276 100644 --- a/examples/curve-pot.py +++ b/examples/curve-pot.py @@ -91,7 +91,8 @@ def draw_pot_figure(aspect_ratio, knl_kwargs = {} vol_source_knl, vol_target_knl = process_kernel(knl, what_operator) - p2p = P2P(actx.context, source_kernels=(vol_source_knl,), + p2p = P2P( + source_kernels=(vol_source_knl,), target_kernels=(vol_target_knl,), exclude_self=False, value_dtypes=np.complex128) @@ -99,7 +100,7 @@ def draw_pot_figure(aspect_ratio, lpot_source_knl, lpot_target_knl = process_kernel(knl, what_operator_lpot) from sumpy.qbx import LayerPotential - lpot = LayerPotential(actx.context, + lpot = LayerPotential( expansion=expn_class(knl, order=order), source_kernels=(lpot_source_knl,), target_kernels=(lpot_target_knl,), @@ -157,13 +158,13 @@ def map_to_curve(t): lpot_kwargs = knl_kwargs.copy() if what_operator == "D": - volpot_kwargs["src_derivative_dir"] = native_curve.normal + volpot_kwargs["src_derivative_dir"] = actx.from_numpy(native_curve.normal) if what_operator_lpot == "D": - lpot_kwargs["src_derivative_dir"] = ovsmp_curve.normal + lpot_kwargs["src_derivative_dir"] = actx.from_numpy(ovsmp_curve.normal) if what_operator_lpot == "S'": - lpot_kwargs["tgt_derivative_dir"] = native_curve.normal + lpot_kwargs["tgt_derivative_dir"] = actx.from_numpy(native_curve.normal) # }}} @@ -177,7 +178,7 @@ def map_to_curve(t): def apply_lpot(x): xovsmp = np.dot(fim, x) - evt, (y,) = lpot(actx.queue, + evt, (y,) = lpot(actx, native_curve.pos, ovsmp_curve.pos, centers, @@ -198,21 +199,33 @@ def apply_lpot(x): # {{{ compute potentials + targets = actx.from_numpy(native_curve.pos) + sources = actx.from_numpy(fp.points) + mode_nr = 0 density = np.cos(mode_nr*2*np.pi*native_t).astype(np.complex128) + strengths = actx.from_numpy(native_curve.speed * native_weights * density) + + result = p2p(actx, + sources, + targets, + [strengths], **volpot_kwargs) + vol_pot = actx.to_numpy(result["result_s0"]) + + ovsmp_targets = actx.from_numpy(ovsmp_curve.pos) + ovsmp_density = np.cos(mode_nr*2*np.pi*ovsmp_t).astype(np.complex128) - evt, (vol_pot,) = p2p(actx.queue, - fp.points, - native_curve.pos, - [native_curve.speed*native_weights*density], **volpot_kwargs) - - evt, (curve_pot,) = lpot(actx.queue, - native_curve.pos, - ovsmp_curve.pos, - centers, - [ovsmp_density * ovsmp_curve.speed * ovsmp_weights], - expansion_radii=np.ones(centers.shape[1]), + ovsmp_strengths = actx.from_numpy( + ovsmp_curve.speed * ovsmp_weights * ovsmp_density) + + result = lpot(actx, + targets, + ovsmp_targets, + actx.from_numpy(centers), + [ovsmp_strengths], + expansion_radii=actx.from_numpy(np.ones(centers.shape[1])), **lpot_kwargs) + curve_pot = actx.to_numpy(result["result_0"]) # }}} From 5cc19191174c364c7b91fd1bd4d0a1780c3c0562 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 5 Sep 2022 20:22:53 +0300 Subject: [PATCH 19/59] add pytools to intersphinx --- doc/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/conf.py b/doc/conf.py index b69774dd9..cfaa8983e 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -18,6 +18,7 @@ intersphinx_mapping = { "https://docs.python.org/3/": None, "https://numpy.org/doc/stable/": None, + "https://documen.tician.de/pytools/": None, "https://documen.tician.de/modepy/": None, "https://documen.tician.de/pyopencl/": None, "https://documen.tician.de/pymbolic/": None, From 04bb78a7127f4b724d44b5470c9d3e485a7b5bec Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 5 Sep 2022 20:36:04 +0300 Subject: [PATCH 20/59] add assumptions at kernel creation --- sumpy/array_context.py | 2 ++ sumpy/e2e.py | 15 +++++++-------- sumpy/e2p.py | 4 ++-- sumpy/p2e.py | 4 ++-- sumpy/p2p.py | 9 ++++----- sumpy/qbx.py | 7 +++---- 6 files changed, 20 insertions(+), 21 deletions(-) diff --git a/sumpy/array_context.py b/sumpy/array_context.py index 1aa0e4c02..70ace786a 100644 --- a/sumpy/array_context.py +++ b/sumpy/array_context.py @@ -46,6 +46,7 @@ def make_loopy_program( kernel_data: Optional[List[Any]] = None, *, name: str = "sumpy_loopy_kernel", silenced_warnings: Optional[Union[List[str], str]] = None, + assumptions: Optional[Union[List[str], str]] = None, index_dtype: Optional["np.dtype"] = None, tags: ToTagSetConvertible = None): """Return a :class:`loopy.LoopKernel` suitable for use with @@ -68,6 +69,7 @@ def make_loopy_program( default_offset=lp.auto, name=name, lang_version=lp.MOST_RECENT_LANGUAGE_VERSION, + assumptions=assumptions, silenced_warnings=silenced_warnings, index_dtype=index_dtype, tags=tags) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 3609d358a..57669aeeb 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -224,9 +224,9 @@ def get_kernel(self): self.tgt_expansion]), name=self.name, silenced_warnings="write_race(write_expn*)", + assumptions="ntgt_boxes>=1", ) - loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") @@ -451,9 +451,9 @@ def get_kernel(self, result_dtype): ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, + assumptions="ntgt_boxes>=1", ) - loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, m2l_translation_classes_dependent_ndata=( @@ -553,9 +553,9 @@ def get_kernel(self, result_dtype): ... ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, + assumptions="ntranslation_classes>=1", ) - loopy_knl = lp.assume(loopy_knl, "ntranslation_classes>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, m2l_translation_classes_dependent_ndata=( @@ -654,9 +654,9 @@ def get_kernel(self, result_dtype): ... ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, + assumptions="nsrc_boxes>=1", ) - loopy_knl = lp.assume(loopy_knl, "nsrc_boxes>=1") loopy_knl = lp.fix_parameters(loopy_knl, nsrc_coeffs=nsrc_coeffs, npreprocessed_src_coeffs=npreprocessed_src_coeffs) @@ -739,9 +739,9 @@ def get_kernel(self, result_dtype): ... ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, + assumptions="ntgt_boxes>=1", ) - loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, nsrc_coeffs=ntgt_coeffs_before_postprocessing, @@ -857,9 +857,9 @@ def get_kernel(self): ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, silenced_warnings="write_race(write_expn*)", + assumptions="ntgt_boxes>=1", ) - loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, nchildren=2**self.dim) for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: @@ -960,11 +960,10 @@ def get_kernel(self): ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, silenced_warnings="write_race(write_expn*)", + assumptions="ntgt_boxes>=1", ) - loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, nchildren=2**self.dim) - for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) diff --git a/sumpy/e2p.py b/sumpy/e2p.py index e7694b912..deb43ddfb 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -177,10 +177,10 @@ def get_kernel(self): ... ] + [arg.loopy_arg for arg in self.expansion.get_args()], name=self.name, + assumptions="ntgt_boxes>=1", silenced_warnings="write_race(write_result*)", ) - loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, nresults=len(result_names)) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") @@ -290,10 +290,10 @@ def get_kernel(self): ... ] + [arg.loopy_arg for arg in self.expansion.get_args()], name=self.name, + assumptions="ntgt_boxes>=1", silenced_warnings="write_race(write_result*)", ) - loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, nresults=len(result_names)) diff --git a/sumpy/p2e.py b/sumpy/p2e.py index b01b255a2..fb4c5bfad 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -211,9 +211,9 @@ def get_kernel(self): self.source_kernels + (self.expansion,)), name=self.name, silenced_warnings="write_race(write_expn*)", + assumptions="nsrc_boxes>=1", ) - loopy_knl = lp.assume(loopy_knl, "nsrc_boxes>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, strength_count=self.strength_count) @@ -331,9 +331,9 @@ def get_kernel(self): kernel_data=arguments, name=self.name, silenced_warnings="write_race(write_expn*)", + assumptions="ntgt_boxes>=1", ) - loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, strength_count=self.strength_count) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index e92689bde..f3ae9d1e4 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -237,9 +237,9 @@ def get_kernel(self): + ["end"], kernel_data=arguments, name=self.name, + assumptions="nsources>=1 and ntargets>=1", ) - loopy_knl = lp.assume(loopy_knl, "nsources>=1 and ntargets>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, nstrengths=self.strength_count, @@ -303,9 +303,9 @@ def get_kernel(self): + ["end"], arguments, name=self.name, + assumptions="nsources>=1 and ntargets>=1", ) - loopy_knl = lp.assume(loopy_knl, "nsources>=1 and ntargets>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") @@ -380,11 +380,10 @@ def get_kernel(self): arguments, silenced_warnings="write_race(write_p2p*)", name=self.name, + assumptions="nresult>=1", ) - loopy_knl = lp.assume(loopy_knl, "nresult>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.add_dtypes(loopy_knl, dict(nsources=np.int32, ntargets=np.int32)) @@ -624,9 +623,9 @@ def get_kernel(self, "write_race(write_csr*)", "write_race(prefetch_src)", "write_race(prefetch_charge)"], + assumptions="ntgt_boxes>=1", ) - loopy_knl = lp.assume(loopy_knl, "ntgt_boxes>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, nstrengths=self.strength_count, diff --git a/sumpy/qbx.py b/sumpy/qbx.py index b476eb4f0..95c741409 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -277,9 +277,9 @@ def get_kernel(self): kernel_data=arguments, name=self.name, silenced_warnings="write_race(write_lpot*)", + assumptions="ntargets>=1 and nsources>=1", ) - loopy_knl = lp.assume(loopy_knl, "ntargets>=1 and nsources>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") @@ -356,9 +356,9 @@ def get_kernel(self): + ["end"], kernel_data=arguments, name=self.name, + assumptions="ntargets>=1 and nsources>=1", ) - loopy_knl = lp.assume(loopy_knl, "ntargets>=1 and nsources>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") @@ -438,11 +438,10 @@ def get_kernel(self): kernel_data=arguments, name=self.name, silenced_warnings="write_race(write_lpot*)", + assumptions="nresult>=1", ) - loopy_knl = lp.assume(loopy_knl, "nresult>=1") loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.add_dtypes(loopy_knl, dict(nsources=np.int32, ntargets=np.int32)) From dd9e7d9c6881e6cebf37db65eb8a9c0ea59703d5 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 5 Sep 2022 20:40:05 +0300 Subject: [PATCH 21/59] port expansion-toys to arraycontext --- examples/expansion-toys.py | 15 +++++++-------- sumpy/toys.py | 14 +++++++------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/examples/expansion-toys.py b/examples/expansion-toys.py index 48647d0ee..cb8f7165e 100644 --- a/examples/expansion-toys.py +++ b/examples/expansion-toys.py @@ -23,7 +23,6 @@ def main(): actx = PyOpenCLArrayContext(queue, force_device_scalars=True) tctx = t.ToyContext( - actx.context, # LaplaceKernel(2), YukawaKernel(2), extra_kernel_kwargs={"lam": 5}, # HelmholtzKernel(2), extra_kernel_kwargs={"k": 0.3}, @@ -37,22 +36,22 @@ def main(): fp = FieldPlotter([3, 0], extent=8) if USE_MATPLOTLIB: - t.logplot(fp, pt_src, cmap="jet") + t.logplot(actx, fp, pt_src, cmap="jet") plt.colorbar() plt.show() - mexp = t.multipole_expand(pt_src, [0, 0], 5) - mexp2 = t.multipole_expand(mexp, [0, 0.25]) # noqa: F841 - lexp = t.local_expand(mexp, [3, 0]) - lexp2 = t.local_expand(lexp, [3, 1], 3) + mexp = t.multipole_expand(actx, pt_src, [0, 0], order=5) + mexp2 = t.multipole_expand(actx, mexp, [0, 0.25]) # noqa: F841 + lexp = t.local_expand(actx, mexp, [3, 0]) + lexp2 = t.local_expand(actx, lexp, [3, 1], order=3) # diff = mexp - pt_src # diff = mexp2 - pt_src diff = lexp2 - pt_src - print(t.l_inf(diff, 1.2, center=lexp2.center)) + print(t.l_inf(actx, diff, 1.2, center=lexp2.center)) if USE_MATPLOTLIB: - t.logplot(fp, diff, cmap="jet", vmin=-3, vmax=0) + t.logplot(actx, fp, diff, cmap="jet", vmin=-3, vmax=0) plt.colorbar() plt.show() diff --git a/sumpy/toys.py b/sumpy/toys.py index 959394fe0..6d7757e04 100644 --- a/sumpy/toys.py +++ b/sumpy/toys.py @@ -357,7 +357,7 @@ def __init__(self, toy_ctx, value): super().__init__(toy_ctx) self.value = np.array(value) - def eval(self, targets): + def eval(self, actx: PyOpenCLArrayContext, targets): pot = np.empty(targets.shape[-1], dtype=self.value.dtype) pot.fill(self.value) return pot @@ -372,7 +372,7 @@ def __init__(self, toy_ctx, center, radius): self.center = np.asarray(center) self.radius = radius - def eval(self, targets): + def eval(self, actx: PyOpenCLArrayContext, targets): dist_vec = targets - self.center[:, np.newaxis] return (np.sum(dist_vec**2, axis=0) < self.radius**2).astype(np.float64) @@ -387,7 +387,7 @@ def __init__(self, toy_ctx, center, axis, side=1): self.axis = axis self.side = side - def eval(self, targets): + def eval(self, actx: PyOpenCLArrayContext, targets): return ( (self.side*(targets[self.axis] - self.center[self.axis])) >= 0 ).astype(np.float64) @@ -563,9 +563,9 @@ def local_expand( raise TypeError(f"do not know how to expand '{type(psource).__name__}'") -def logplot(fp, psource, **kwargs): +def logplot(actx, fp, psource, **kwargs): fp.show_scalar_in_matplotlib( - np.log10(np.abs(psource.eval(fp.points) + 1e-15)), **kwargs) + np.log10(np.abs(psource.eval(actx, fp.points) + 1e-15)), **kwargs) def combine_inner_outer(psource_inner, psource_outer, radius, center=None): @@ -603,7 +603,7 @@ def combine_halfspace_and_outer(psource_pos, psource_neg, psource_outer, psource_outer, radius, center) -def l_inf(psource, radius, center=None, npoints=100, debug=False): +def l_inf(actx, psource, radius, center=None, npoints=100, debug=False): if center is None: center = psource.center @@ -611,7 +611,7 @@ def l_inf(psource, radius, center=None, npoints=100, debug=False): from sumpy.visualization import FieldPlotter fp = FieldPlotter(center, extent=2*radius, npoints=npoints) - z = restr.eval(fp.points) + z = restr.eval(actx, fp.points) if debug: fp.show_scalar_in_matplotlib( From 5bba7e27653e4c446f4edc9a83be19d0145a83de Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Tue, 6 Sep 2022 14:50:20 +0300 Subject: [PATCH 22/59] move get_kernel calls to separate line for debugging --- sumpy/e2e.py | 37 ++++++++++++++++++++++++++----------- sumpy/e2p.py | 12 +++++++----- sumpy/p2e.py | 13 +++++++------ sumpy/p2p.py | 10 +++++----- sumpy/qbx.py | 8 ++++---- 5 files changed, 49 insertions(+), 31 deletions(-) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 57669aeeb..6fa1754cc 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -44,7 +44,7 @@ """ -# {{{ translation base class +# {{{ E2EBase: base class class E2EBase(KernelCacheMixin): def __init__(self, src_expansion, tgt_expansion, name=None): @@ -123,7 +123,7 @@ def get_optimized_kernel(self): # }}} -# {{{ translation from "compressed sparse row"-like source box lists +# {{{ E2EFromCSR: translation from "compressed sparse row"-like source box lists class E2EFromCSR(E2EBase): """Implements translation from a "compressed sparse row"-like source box @@ -260,12 +260,17 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): src_rscale = centers.dtype.type(kwargs.pop("src_rscale")) tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) + knl = self.get_cached_optimized_kernel() return actx.call_loopy( - self.get_cached_optimized_kernel(), + knl, centers=centers, src_rscale=src_rscale, tgt_rscale=tgt_rscale, **kwargs) +# }}} + + +# {{{ M2LUsingTranslationClassesDependentData class M2LUsingTranslationClassesDependentData(E2EFromCSR): """Implements translation from a "compressed sparse row"-like source box @@ -496,13 +501,18 @@ def __call__(self, actx, **kwargs): tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) src_expansions = kwargs.pop("src_expansions") + knl = self.get_cached_optimized_kernel(result_dtype=src_expansions.dtype) return actx.call_loopy( - self.get_cached_optimized_kernel(result_dtype=src_expansions.dtype), + knl, src_expansions=src_expansions, centers=centers, src_rscale=src_rscale, tgt_rscale=tgt_rscale, **kwargs) +# }}} + + +# {{{ M2LGenerateTranslationClassesDependentData class M2LGenerateTranslationClassesDependentData(E2EBase): """Implements precomputing the M2L kernel dependent data which are @@ -600,8 +610,9 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): "m2l_translation_classes_dependent_data") result_dtype = m2l_translation_classes_dependent_data.dtype + knl = self.get_cached_optimized_kernel(result_dtype=result_dtype) return actx.call_loopy( - self.get_cached_optimized_kernel(result_dtype=result_dtype), + knl, src_rscale=src_rscale, m2l_translation_vectors=m2l_translation_vectors, m2l_translation_classes_dependent_data=( @@ -685,8 +696,9 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): preprocessed_src_expansions = kwargs.pop("preprocessed_src_expansions") result_dtype = preprocessed_src_expansions.dtype + knl = self.get_cached_optimized_kernel(result_dtype=result_dtype) return actx.call_loopy( - self.get_cached_optimized_kernel(result_dtype=result_dtype), + knl, preprocessed_src_expansions=preprocessed_src_expansions, **kwargs) @@ -771,15 +783,16 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): tgt_expansions = kwargs.pop("tgt_expansions") result_dtype = tgt_expansions.dtype + knl = self.get_cached_optimized_kernel(result_dtype=result_dtype) return actx.call_loopy( - self.get_cached_optimized_kernel(result_dtype=result_dtype), + knl, tgt_expansions=tgt_expansions, **kwargs) # }}} -# {{{ translation from a box's children +# {{{ E2EFromChildren: translation from a box's children class E2EFromChildren(E2EBase): default_name = "e2e_from_children" @@ -887,8 +900,9 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): src_rscale = centers.dtype.type(kwargs.pop("src_rscale")) tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) + knl = self.get_cached_optimized_kernel() return actx.call_loopy( - self.get_cached_optimized_kernel(), + knl, centers=centers, src_rscale=src_rscale, tgt_rscale=tgt_rscale, **kwargs) @@ -896,7 +910,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): # }}} -# {{{ translation from a box's parent +# {{{ E2EFromParent: translation from a box's parent class E2EFromParent(E2EBase): default_name = "e2e_from_parent" @@ -989,8 +1003,9 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): src_rscale = centers.dtype.type(kwargs.pop("src_rscale")) tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) + knl = self.get_cached_optimized_kernel() return actx.call_loopy( - self.get_cached_optimized_kernel(), + knl, centers=centers, src_rscale=src_rscale, tgt_rscale=tgt_rscale, **kwargs) diff --git a/sumpy/e2p.py b/sumpy/e2p.py index deb43ddfb..936651a02 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -39,7 +39,7 @@ """ -# {{{ E2P base class +# {{{ E2PBase: base class class E2PBase(KernelCacheMixin): def __init__(self, expansion, kernels, name=None): @@ -119,7 +119,7 @@ def get_cache_key(self): # }}} -# {{{ E2P to single box (L2P, likely) +# {{{ E2PFromSingleBox: E2P to single box (L2P, likely) class E2PFromSingleBox(E2PBase): default_name = "e2p_from_single_box" @@ -215,14 +215,15 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): # meaningfully inferred. Make the type of rscale explicit. rscale = centers.dtype.type(kwargs.pop("rscale")) + knl = self.get_cached_optimized_kernel() return actx.call_loopy( - self.get_cached_optimized_kernel(), + knl, centers=centers, rscale=rscale, **kwargs) # }}} -# {{{ E2P from CSR-like interaction list +# {{{ E2PFromCSR: E2P from CSR-like interaction list class E2PFromCSR(E2PBase): default_name = "e2p_from_csr" @@ -321,8 +322,9 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): # meaningfully inferred. Make the type of rscale explicit. rscale = centers.dtype.type(kwargs.pop("rscale")) + knl = self.get_cached_optimized_kernel() return actx.call_loopy( - self.get_cached_optimized_kernel(), + knl, centers=centers, rscale=rscale, **kwargs) # }}} diff --git a/sumpy/p2e.py b/sumpy/p2e.py index fb4c5bfad..8c566808f 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -41,7 +41,7 @@ """ -# {{{ P2E base class +# {{{ P2EBase: base class class P2EBase(KernelCacheMixin, KernelComputation): """Common input processing for kernel computations. @@ -138,15 +138,16 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): from sumpy.tools import is_obj_array_like sources = kwargs.pop("sources") centers = kwargs.pop("centers") - knl = self.get_cached_optimized_kernel( - sources_is_obj_array=is_obj_array_like(sources), - centers_is_obj_array=is_obj_array_like(centers)) # "1" may be passed for rscale, which won't have its type # meaningfully inferred. Make the type of rscale explicit. dtype = centers[0].dtype if is_obj_array_like(centers) else centers.dtype rscale = dtype.type(kwargs.pop("rscale")) + knl = self.get_cached_optimized_kernel( + sources_is_obj_array=is_obj_array_like(sources), + centers_is_obj_array=is_obj_array_like(centers)) + return actx.call_loopy( knl, sources=sources, centers=centers, rscale=rscale, @@ -155,7 +156,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): # }}} -# {{{ P2E from single box (P2M, likely) +# {{{ P2EFromSingleBox: P2E from single box (P2M, likely) class P2EFromSingleBox(P2EBase): """ @@ -258,7 +259,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): # }}} -# {{{ P2E from CSR-like interaction list +# {{{ P2EFromCSR: P2E from CSR-like interaction list class P2EFromCSR(P2EBase): """ diff --git a/sumpy/p2p.py b/sumpy/p2p.py index f3ae9d1e4..5a9ae704f 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -50,7 +50,7 @@ # LATER: # - Optimization for source == target (postpone) -# {{{ p2p base class +# {{{ P2PBase: base class class P2PBase(KernelCacheMixin, KernelComputation): def __init__(self, target_kernels, exclude_self, strength_usage=None, @@ -198,7 +198,7 @@ def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array): # }}} -# {{{ P2P point-interaction calculation +# {{{ P2P: point-interaction calculation class P2P(P2PBase): """Direct applier for P2P interactions.""" @@ -265,7 +265,7 @@ def __call__(self, actx: PyOpenCLArrayContext, # }}} -# {{{ P2P matrix writer +# {{{ P2PMatrixGenerator: matrix writer class P2PMatrixGenerator(P2PBase): """Generator for P2P interaction matrix entries.""" @@ -324,7 +324,7 @@ def __call__(self, actx: PyOpenCLArrayContext, targets, sources, **kwargs): # }}} -# {{{ P2P matrix subset generator +# {{{ P2PMatrixSubsetGenerator: matrix subset generator class P2PMatrixSubsetGenerator(P2PBase): """Generator for a subset of P2P interaction matrix entries. @@ -439,7 +439,7 @@ def __call__(self, actx: PyOpenCLArrayContext, # }}} -# {{{ P2P from CSR-like interaction list +# {{{ P2PFromCSR: P2P from CSR-like interaction list class P2PFromCSR(P2PBase): default_name = "p2p_from_csr" diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 95c741409..a5eff1581 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -62,7 +62,7 @@ def stringify_expn_index(i: Union[Tuple[int, ...], int]) -> str: # {{{ layer potential computation -# {{{ base class +# {{{ LayerPotentialBase: base class class LayerPotentialBase(KernelCacheMixin, KernelComputation): def __init__(self, expansion, strength_usage=None, @@ -228,7 +228,7 @@ def get_optimized_kernel(self, *, # }}} -# {{{ direct applier +# {{{ LayerPotential: direct applier class LayerPotential(LayerPotentialBase): """Direct applier for the layer potential. @@ -313,7 +313,7 @@ def __call__(self, actx: PyOpenCLArrayContext, # }}} -# {{{ matrix writer +# {{{ LayerPotentialMatrixGenerator: matrix writer class LayerPotentialMatrixGenerator(LayerPotentialBase): """Generator for layer potential matrix entries.""" @@ -383,7 +383,7 @@ def __call__(self, actx: PyOpenCLArrayContext, # }}} -# {{{ matrix subset generator +# {{{ LayerPotentialMatrixSubsetGenerator: matrix subset generator class LayerPotentialMatrixSubsetGenerator(LayerPotentialBase): """Generator for a subset of the layer potential matrix entries. From 2332fd5afd1dc40dfac588665db23b011cd923b3 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Tue, 6 Sep 2022 15:12:44 +0300 Subject: [PATCH 23/59] add fixed_parameters to make_loopy_program --- sumpy/array_context.py | 4 +++- sumpy/e2e.py | 46 ++++++++++++++++++------------------------ sumpy/e2p.py | 8 ++------ sumpy/p2e.py | 12 +++++------ sumpy/p2p.py | 27 ++++++++++++------------- sumpy/qbx.py | 8 +++----- sumpy/tools.py | 4 +--- 7 files changed, 48 insertions(+), 61 deletions(-) diff --git a/sumpy/array_context.py b/sumpy/array_context.py index 70ace786a..a369b56bb 100644 --- a/sumpy/array_context.py +++ b/sumpy/array_context.py @@ -20,7 +20,7 @@ THE SOFTWARE. """ -from typing import Any, List, Optional, Union +from typing import Any, Dict, List, Optional, Union import numpy as np @@ -47,6 +47,7 @@ def make_loopy_program( name: str = "sumpy_loopy_kernel", silenced_warnings: Optional[Union[List[str], str]] = None, assumptions: Optional[Union[List[str], str]] = None, + fixed_parameters: Optional[Dict[str, Any]] = None, index_dtype: Optional["np.dtype"] = None, tags: ToTagSetConvertible = None): """Return a :class:`loopy.LoopKernel` suitable for use with @@ -70,6 +71,7 @@ def make_loopy_program( name=name, lang_version=lp.MOST_RECENT_LANGUAGE_VERSION, assumptions=assumptions, + fixed_parameters=fixed_parameters, silenced_warnings=silenced_warnings, index_dtype=index_dtype, tags=tags) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 6fa1754cc..142f01dce 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -225,10 +225,9 @@ def get_kernel(self): name=self.name, silenced_warnings="write_race(write_expn*)", assumptions="ntgt_boxes>=1", + fixed_parameters=dict(dim=self.dim), ) - loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.set_options(loopy_knl, enforce_variable_access_ordered="no_check") @@ -457,15 +456,14 @@ def get_kernel(self, result_dtype): self.tgt_expansion]), name=self.name, assumptions="ntgt_boxes>=1", + fixed_parameters=dict( + dim=self.dim, + m2l_translation_classes_dependent_ndata=( + m2l_translation_classes_dependent_ndata), + ncoeff_tgt=ncoeff_tgt, + ncoeff_src=ncoeff_src), ) - loopy_knl = lp.fix_parameters(loopy_knl, - dim=self.dim, - m2l_translation_classes_dependent_ndata=( - m2l_translation_classes_dependent_ndata), - ncoeff_tgt=ncoeff_tgt, - ncoeff_src=ncoeff_src) - loopy_knl = lp.merge([translation_knl, loopy_knl]) loopy_knl = lp.inline_callable_kernel(loopy_knl, "e2e") loopy_knl = lp.add_dependency( @@ -564,13 +562,12 @@ def get_kernel(self, result_dtype): ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, assumptions="ntranslation_classes>=1", + fixed_parameters=dict( + dim=self.dim, + m2l_translation_classes_dependent_ndata=( + m2l_translation_classes_dependent_ndata)), ) - loopy_knl = lp.fix_parameters(loopy_knl, - dim=self.dim, - m2l_translation_classes_dependent_ndata=( - m2l_translation_classes_dependent_ndata)) - for expr_knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = expr_knl.prepare_loopy_kernel(loopy_knl) @@ -666,12 +663,11 @@ def get_kernel(self, result_dtype): ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, assumptions="nsrc_boxes>=1", + fixed_parameters=dict( + nsrc_coeffs=nsrc_coeffs, + npreprocessed_src_coeffs=npreprocessed_src_coeffs), ) - loopy_knl = lp.fix_parameters(loopy_knl, - nsrc_coeffs=nsrc_coeffs, - npreprocessed_src_coeffs=npreprocessed_src_coeffs) - for expn in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) @@ -752,13 +748,12 @@ def get_kernel(self, result_dtype): ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, assumptions="ntgt_boxes>=1", + fixed_parameters=dict( + dim=self.dim, + nsrc_coeffs=ntgt_coeffs_before_postprocessing, + ntgt_coeffs=ntgt_coeffs), ) - loopy_knl = lp.fix_parameters(loopy_knl, - dim=self.dim, - nsrc_coeffs=ntgt_coeffs_before_postprocessing, - ntgt_coeffs=ntgt_coeffs) - for expn in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) @@ -871,10 +866,9 @@ def get_kernel(self): name=self.name, silenced_warnings="write_race(write_expn*)", assumptions="ntgt_boxes>=1", + fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim), ) - loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, nchildren=2**self.dim) - for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) @@ -975,9 +969,9 @@ def get_kernel(self): name=self.name, silenced_warnings="write_race(write_expn*)", assumptions="ntgt_boxes>=1", + fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim), ) - loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim, nchildren=2**self.dim) for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 936651a02..c3ad116c1 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -179,10 +179,9 @@ def get_kernel(self): name=self.name, assumptions="ntgt_boxes>=1", silenced_warnings="write_race(write_result*)", + fixed_parameters=dict(dim=self.dim, nresults=len(result_names)), ) - loopy_knl = lp.fix_parameters(loopy_knl, - dim=self.dim, nresults=len(result_names)) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.kernels: @@ -293,12 +292,9 @@ def get_kernel(self): name=self.name, assumptions="ntgt_boxes>=1", silenced_warnings="write_race(write_result*)", + fixed_parameters=dict(dim=self.dim, nresults=len(result_names)), ) - loopy_knl = lp.fix_parameters(loopy_knl, - dim=self.dim, - nresults=len(result_names)) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.prioritize_loops(loopy_knl, "itgt_box,itgt,isrc_box") diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 8c566808f..8a0e54a2a 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -213,11 +213,11 @@ def get_kernel(self): name=self.name, silenced_warnings="write_race(write_expn*)", assumptions="nsrc_boxes>=1", + fixed_parameters=dict( + dim=self.dim, + strength_count=self.strength_count), ) - loopy_knl = lp.fix_parameters(loopy_knl, - dim=self.dim, - strength_count=self.strength_count) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.source_kernels: @@ -333,11 +333,11 @@ def get_kernel(self): name=self.name, silenced_warnings="write_race(write_expn*)", assumptions="ntgt_boxes>=1", + fixed_parameters=dict( + dim=self.dim, + strength_count=self.strength_count), ) - loopy_knl = lp.fix_parameters(loopy_knl, - dim=self.dim, - strength_count=self.strength_count) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.source_kernels: diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 5a9ae704f..c2ec45eed 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -238,12 +238,12 @@ def get_kernel(self): kernel_data=arguments, name=self.name, assumptions="nsources>=1 and ntargets>=1", + fixed_parameters=dict( + dim=self.dim, + nstrengths=self.strength_count, + nresults=len(self.target_kernels)), ) - loopy_knl = lp.fix_parameters(loopy_knl, - dim=self.dim, - nstrengths=self.strength_count, - nresults=len(self.target_kernels)) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.target_kernels + self.source_kernels: @@ -304,9 +304,9 @@ def get_kernel(self): arguments, name=self.name, assumptions="nsources>=1 and ntargets>=1", + fixed_parameters=dict(dim=self.dim), ) - loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.target_kernels + self.source_kernels: @@ -381,9 +381,9 @@ def get_kernel(self): silenced_warnings="write_race(write_p2p*)", name=self.name, assumptions="nresult>=1", + fixed_parameters=dict(dim=self.dim), ) - loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.add_dtypes(loopy_knl, dict(nsources=np.int32, ntargets=np.int32)) @@ -624,16 +624,15 @@ def get_kernel(self, "write_race(prefetch_src)", "write_race(prefetch_charge)"], assumptions="ntgt_boxes>=1", + fixed_parameters=dict( + dim=self.dim, + nstrengths=self.strength_count, + nsplit=nsplit, + src_outer_limit=src_outer_limit, + tgt_outer_limit=tgt_outer_limit, + noutputs=len(self.target_kernels)), ) - loopy_knl = lp.fix_parameters(loopy_knl, - dim=self.dim, - nstrengths=self.strength_count, - nsplit=nsplit, - src_outer_limit=src_outer_limit, - tgt_outer_limit=tgt_outer_limit, - noutputs=len(self.target_kernels)) - loopy_knl = lp.add_dtypes(loopy_knl, dict(nsources=np.int32, ntargets=np.int32)) diff --git a/sumpy/qbx.py b/sumpy/qbx.py index a5eff1581..e44631e89 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -278,10 +278,9 @@ def get_kernel(self): name=self.name, silenced_warnings="write_race(write_lpot*)", assumptions="ntargets>=1 and nsources>=1", + fixed_parameters=dict(dim=self.dim), ) - loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for knl in self.target_kernels + self.source_kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) @@ -357,10 +356,9 @@ def get_kernel(self): kernel_data=arguments, name=self.name, assumptions="ntargets>=1 and nsources>=1", + fixed_parameters=dict(dim=self.dim), ) - loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") for expn in self.source_kernels + self.target_kernels: loopy_knl = expn.prepare_loopy_kernel(loopy_knl) @@ -439,9 +437,9 @@ def get_kernel(self): name=self.name, silenced_warnings="write_race(write_lpot*)", assumptions="nresult>=1", + fixed_parameters=dict(dim=self.dim), ) - loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.add_dtypes(loopy_knl, dict(nsources=np.int32, ntargets=np.int32)) diff --git a/sumpy/tools.py b/sumpy/tools.py index 61792c1d3..899e9a3ae 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -1110,11 +1110,9 @@ def loopy_fft(shape, inverse, complex_dtype, index_dtype=None, domains, insns, kernel_data=kernel_data, name=name, + fixed_parameters=fixed_parameters, ) - # FIXME: set index_dtype? - knl = lp.fix_parameters(knl, **fixed_parameters) - if broadcast_dims: knl = lp.split_iname(knl, "j0", 32, inner_tag="l.0", outer_tag="g.0") knl = lp.add_inames_for_unused_hw_axes(knl) From 6da8704a6f3e856db294f62a2c9178a1919818dd Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Tue, 6 Sep 2022 15:42:09 +0300 Subject: [PATCH 24/59] port test_qbx to arraycontext --- sumpy/tools.py | 3 +-- test/test_qbx.py | 53 ++++++++++++++++++++++++++---------------------- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/sumpy/tools.py b/sumpy/tools.py index 899e9a3ae..74ae92c27 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -53,7 +53,7 @@ from arraycontext import Array import sumpy.symbolic as sym -from sumpy.array_context import PyOpenCLArrayContext +from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program import logging logger = logging.getLogger(__name__) @@ -1105,7 +1105,6 @@ def loopy_fft(shape, inverse, complex_dtype, index_dtype=None, else: name = f"fft_{n}" - from arraycontext import make_loopy_program knl = make_loopy_program( domains, insns, kernel_data=kernel_data, diff --git a/test/test_qbx.py b/test/test_qbx.py index c38af078b..2860dfb85 100644 --- a/test/test_qbx.py +++ b/test/test_qbx.py @@ -63,7 +63,7 @@ def test_direct_qbx_vs_eigval(actx_factory, expn_class, visualize=False): from sumpy.qbx import LayerPotential - lpot = LayerPotential(actx, + lpot = LayerPotential( expansion=expn_class(lknl, order), target_kernels=(lknl,), source_kernels=(lknl,)) @@ -75,11 +75,10 @@ def test_direct_qbx_vs_eigval(actx_factory, expn_class, visualize=False): eocrec = EOCRecorder() for n in [200, 300, 400]: - t = np.linspace(0, 2 * np.pi, n, endpoint=False) - unit_circle = np.exp(1j * t) - unit_circle = np.array([unit_circle.real, unit_circle.imag]) + t = actx.from_numpy(np.linspace(0, 2 * np.pi, n, endpoint=False)) + unit_circle = actx.np.stack([actx.np.cos(t), actx.np.sin(t)]) - sigma = np.cos(mode_nr * t) + sigma = actx.np.cos(mode_nr * t) eigval = 1/(2*mode_nr) result_ref = eigval * sigma @@ -91,16 +90,17 @@ def test_direct_qbx_vs_eigval(actx_factory, expn_class, visualize=False): radius = 7 * h centers = unit_circle * (1 - radius) - - expansion_radii = np.ones(n) * radius + expansion_radii = actx.from_numpy(np.full(n, radius)) strengths = (sigma * h,) - evt, (result_qbx,) = lpot( + result_qbx = lpot( actx, targets, sources, centers, strengths, - expansion_radii=expansion_radii) + expansion_radii=expansion_radii)["result_0"] - eocrec.add_data_point(h, np.max(np.abs(result_ref - result_qbx))) + error = actx.to_numpy( + actx.np.linalg.norm(result_ref - result_qbx, np.inf)) + eocrec.add_data_point(h, error) logger.info("eoc:\n%s", eocrec) @@ -133,10 +133,14 @@ def test_direct_qbx_vs_eigval_with_tgt_deriv( from sumpy.qbx import LayerPotential - lpot_dx = LayerPotential(actx, expansion=expn_class(lknl, order), - target_kernels=(AxisTargetDerivative(0, lknl),), source_kernels=(lknl,)) - lpot_dy = LayerPotential(actx, expansion=expn_class(lknl, order), - target_kernels=(AxisTargetDerivative(1, lknl),), source_kernels=(lknl,)) + lpot_dx = LayerPotential( + expansion=expn_class(lknl, order), + target_kernels=(AxisTargetDerivative(0, lknl),), + source_kernels=(lknl,)) + lpot_dy = LayerPotential( + expansion=expn_class(lknl, order), + target_kernels=(AxisTargetDerivative(1, lknl),), + source_kernels=(lknl,)) mode_nr = 15 @@ -145,11 +149,10 @@ def test_direct_qbx_vs_eigval_with_tgt_deriv( eocrec = EOCRecorder() for n in [200, 300, 400]: - t = np.linspace(0, 2 * np.pi, n, endpoint=False) - unit_circle = np.exp(1j * t) - unit_circle = np.array([unit_circle.real, unit_circle.imag]) + t = actx.from_numpy(np.linspace(0, 2 * np.pi, n, endpoint=False)) + unit_circle = actx.np.stack([actx.np.cos(t), actx.np.sin(t)]) - sigma = np.cos(mode_nr * t) + sigma = actx.np.cos(mode_nr * t) #eigval = 1/(2*mode_nr) eigval = 0.5 @@ -163,21 +166,23 @@ def test_direct_qbx_vs_eigval_with_tgt_deriv( radius = 7 * h centers = unit_circle * (1 - radius) - expansion_radii = np.ones(n) * radius + expansion_radii = actx.from_numpy(np.full(n, radius)) strengths = (sigma * h,) - evt, (result_qbx_dx,) = lpot_dx(actx, + result_qbx_dx = lpot_dx(actx, targets, sources, centers, strengths, - expansion_radii=expansion_radii) - evt, (result_qbx_dy,) = lpot_dy(actx, + expansion_radii=expansion_radii)["result_0"] + result_qbx_dy = lpot_dy(actx, targets, sources, centers, strengths, - expansion_radii=expansion_radii) + expansion_radii=expansion_radii)["result_0"] normals = unit_circle result_qbx = normals[0] * result_qbx_dx + normals[1] * result_qbx_dy - eocrec.add_data_point(h, np.max(np.abs(result_ref - result_qbx))) + error = actx.to_numpy( + actx.np.linalg.norm(result_ref - result_qbx, np.inf)) + eocrec.add_data_point(h, error) if expn_class is not LineTaylorLocalExpansion: logger.info("eoc:\n%s", eocrec) From 4aca058bb4f1fe3a945a19aa3dee421e0b7f4aa6 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Tue, 6 Sep 2022 15:53:32 +0300 Subject: [PATCH 25/59] port test_matrixgen to arraycontext --- test/test_matrixgen.py | 108 +++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 52 deletions(-) diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index c50981b44..c3a67b9f4 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -70,9 +70,9 @@ def _build_subset_indices(actx, ntargets, nsources, factor): rng = np.random.default_rng() if abs(factor - 1.0) > 1.0e-14: tgtindices = rng.choice(tgtindices, - size=int(factor * ntargets), replace=False) + size=int(factor * ntargets), replace=False) srcindices = rng.choice(srcindices, - size=int(factor * nsources), replace=False) + size=int(factor * nsources), replace=False) else: rng.shuffle(tgtindices) rng.shuffle(srcindices) @@ -111,60 +111,62 @@ def test_qbx_direct(actx_factory, factor, lpot_id, visualize=False): expn = LineTaylorLocalExpansion(knl, order) from sumpy.qbx import LayerPotential - lpot = LayerPotential(actx, expansion=expn, source_kernels=(knl,), - target_kernels=(base_knl,)) + lpot = LayerPotential( + expansion=expn, + source_kernels=(knl,), + target_kernels=(base_knl,)) from sumpy.qbx import LayerPotentialMatrixGenerator - mat_gen = LayerPotentialMatrixGenerator(actx, - expansion=expn, - source_kernels=(knl,), - target_kernels=(base_knl,)) + mat_gen = LayerPotentialMatrixGenerator( + expansion=expn, + source_kernels=(knl,), + target_kernels=(base_knl,)) from sumpy.qbx import LayerPotentialMatrixSubsetGenerator - blk_gen = LayerPotentialMatrixSubsetGenerator(actx, - expansion=expn, - source_kernels=(knl,), - target_kernels=(base_knl,)) + blk_gen = LayerPotentialMatrixSubsetGenerator( + expansion=expn, + source_kernels=(knl,), + target_kernels=(base_knl,)) for n in [200, 300, 400]: - targets, sources, centers, expansion_radii, sigma = \ - _build_geometry(actx, n, n, mode_nr, target_radius=1.2) + targets, sources, centers, expansion_radii, sigma = ( + _build_geometry(actx, n, n, mode_nr, target_radius=1.2)) h = 2 * np.pi / n strengths = (sigma * h,) - tgtindices, srcindices = _build_subset_indices(actx, - ntargets=n, nsources=n, factor=factor) + tgtindices, srcindices = ( + _build_subset_indices(actx, ntargets=n, nsources=n, factor=factor)) extra_kwargs = {} if lpot_id == 2: from pytools.obj_array import make_obj_array extra_kwargs["dsource_vec"] = ( - actx.from_numpy(make_obj_array(np.ones((ndim, n)))) - ) + actx.from_numpy(make_obj_array(np.ones((ndim, n)))) + ) - _, (result_lpot,) = lpot(actx, - targets=targets, - sources=sources, - centers=centers, - expansion_radii=expansion_radii, - strengths=strengths, **extra_kwargs) + result_lpot = lpot(actx, + targets=targets, + sources=sources, + centers=centers, + expansion_radii=expansion_radii, + strengths=strengths, **extra_kwargs)["result_0"] result_lpot = actx.to_numpy(result_lpot) - _, (mat,) = mat_gen(actx, - targets=targets, - sources=sources, - centers=centers, - expansion_radii=expansion_radii, **extra_kwargs) + mat = mat_gen(actx, + targets=targets, + sources=sources, + centers=centers, + expansion_radii=expansion_radii, **extra_kwargs)["result_0"] mat = actx.to_numpy(mat) result_mat = mat @ actx.to_numpy(strengths[0]) - _, (blk,) = blk_gen(actx, - targets=targets, - sources=sources, - centers=centers, - expansion_radii=expansion_radii, - tgtindices=tgtindices, - srcindices=srcindices, **extra_kwargs) + blk = blk_gen(actx, + targets=targets, + sources=sources, + centers=centers, + expansion_radii=expansion_radii, + tgtindices=tgtindices, + srcindices=srcindices, **extra_kwargs)["result_0"] blk = actx.to_numpy(blk) tgtindices = actx.to_numpy(tgtindices) @@ -201,13 +203,15 @@ def test_p2p_direct(actx_factory, exclude_self, factor, lpot_id, visualize=False raise ValueError(f"unknown lpot_id: '{lpot_id}'") from sumpy.p2p import P2P - lpot = P2P(actx, [lknl], exclude_self=exclude_self) + lpot = P2P(target_kernels=[lknl], exclude_self=exclude_self) from sumpy.p2p import P2PMatrixGenerator - mat_gen = P2PMatrixGenerator(actx, [lknl], exclude_self=exclude_self) + mat_gen = P2PMatrixGenerator( + target_kernels=[lknl], exclude_self=exclude_self) from sumpy.p2p import P2PMatrixSubsetGenerator - blk_gen = P2PMatrixSubsetGenerator(actx, [lknl], exclude_self=exclude_self) + blk_gen = P2PMatrixSubsetGenerator( + target_kernels=[lknl], exclude_self=exclude_self) for n in [200, 300, 400]: targets, sources, _, _, sigma = ( @@ -215,8 +219,8 @@ def test_p2p_direct(actx_factory, exclude_self, factor, lpot_id, visualize=False h = 2 * np.pi / n strengths = (sigma * h,) - tgtindices, srcindices = _build_subset_indices(actx, - ntargets=n, nsources=n, factor=factor) + tgtindices, srcindices = ( + _build_subset_indices(actx, ntargets=n, nsources=n, factor=factor)) extra_kwargs = {} if exclude_self: @@ -226,25 +230,25 @@ def test_p2p_direct(actx_factory, exclude_self, factor, lpot_id, visualize=False if lpot_id == 2: from pytools.obj_array import make_obj_array extra_kwargs["dsource_vec"] = ( - actx.from_numpy(make_obj_array(np.ones((ndim, n))))) + actx.from_numpy(make_obj_array(np.ones((ndim, n))))) - _, (result_lpot,) = lpot(actx, + result_lpot = lpot(actx, targets=targets, sources=sources, - strength=strengths, **extra_kwargs) + strength=strengths, **extra_kwargs)["result_s0"] result_lpot = actx.to_numpy(result_lpot) - _, (mat,) = mat_gen(actx, - targets=targets, - sources=sources, **extra_kwargs) + mat = mat_gen(actx, + targets=targets, + sources=sources, **extra_kwargs)["result_0"] mat = actx.to_numpy(mat) result_mat = mat @ actx.to_numpy(strengths[0]) - _, (blk,) = blk_gen(actx, - targets=targets, - sources=sources, - tgtindices=tgtindices, - srcindices=srcindices, **extra_kwargs) + blk = blk_gen(actx, + targets=targets, + sources=sources, + tgtindices=tgtindices, + srcindices=srcindices, **extra_kwargs)["result_0"] blk = actx.to_numpy(blk) tgtindices = actx.to_numpy(tgtindices) From 711f93845176fae9014f5b3119042a6308439cf1 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Tue, 6 Sep 2022 19:31:22 +0300 Subject: [PATCH 26/59] continue porting fmm to arraycontext --- sumpy/fmm.py | 273 ++++++++++++++++++++++++----------------------- test/test_fmm.py | 2 +- 2 files changed, 140 insertions(+), 135 deletions(-) diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 4bdc85f8f..b4b8b6eff 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -34,8 +34,9 @@ from pytools import memoize_method from boxtree.fmm import TreeIndependentDataForWrangler, ExpansionWranglerInterface -from arraycontext import ArrayContext, Array +from arraycontext import Array +from sumpy.array_context import PyOpenCLArrayContext from sumpy import ( P2EFromSingleBox, P2EFromCSR, E2PFromSingleBox, E2PFromCSR, @@ -55,16 +56,16 @@ class SumpyTreeIndependentDataForWrangler(TreeIndependentDataForWrangler): """Objects of this type serve as a place to keep the code needed for :class:`SumpyExpansionWrangler`. Since :class:`SumpyExpansionWrangler` - necessarily must have a :class:`pyopencl.CommandQueue`, but this queue + necessarily must have an :class:`arraycontext.ArrayContext`, but it is allowed to be more ephemeral than the code, the code's lifetime is decoupled by storing it in this object. Timing results returned by this wrangler contain the values *wall_elapsed* - which measures elapsed wall time. This requires a command queue with + which measures elapsed wall time. This requires an array container with profiling enabled. """ - def __init__(self, setup_actx: ArrayContext, + def __init__(self, array_context: PyOpenCLArrayContext, multipole_expansion_factory, local_expansion_factory, target_kernels, exclude_self=False, use_rscale=None, @@ -79,7 +80,7 @@ def __init__(self, setup_actx: ArrayContext, :arg strength_usage: passed unchanged to p2l, p2m and p2p. :arg source_kernels: passed unchanged to p2l, p2m and p2p. """ - self._setup_actx = setup_actx.clone() + self._setup_actx = array_context self.multipole_expansion_factory = multipole_expansion_factory self.local_expansion_factory = local_expansion_factory @@ -184,8 +185,7 @@ def p2p(self): @memoize_method def opencl_fft_app(self, shape, dtype, inverse): - with cl.CommandQueue(self.cl_context) as queue: - return get_opencl_fft_app(queue, shape, dtype, inverse) + return get_opencl_fft_app(self._setup_actx, shape, dtype, inverse) # }}} @@ -325,13 +325,14 @@ def __init__(self, tree_indep, traversal, dtype, fmm_level_to_order, self.supports_translation_classes = False else: if translation_classes_data is None: - with cl.CommandQueue(self.tree_indep.cl_context) as queue: - from boxtree.translation_classes import TranslationClassesBuilder - translation_classes_builder = TranslationClassesBuilder( - queue.context) - translation_classes_data, _ = translation_classes_builder( - queue, traversal, self.tree, - is_translation_per_level=True) + from boxtree.translation_classes import TranslationClassesBuilder + + actx = tree_indep._setup_actx + translation_classes_builder = TranslationClassesBuilder(actx) + translation_classes_data, _ = translation_classes_builder( + actx, traversal, self.tree, is_translation_per_level=True) + translation_classes_data = actx.freeze(translation_classes_data) + self.supports_translation_classes = True self.translation_classes_data = translation_classes_data @@ -373,9 +374,10 @@ def local_expansions_level_starts(self): @memoize_method def m2l_translation_class_level_start_box_nrs(self): - with cl.CommandQueue(self.tree_indep.cl_context) as queue: - data = self.translation_classes_data - return data.from_sep_siblings_translation_classes_level_starts.get(queue) + actx = self.tree_indep._setup_actx + return actx.to_numpy( + self.translation_classes_data + .from_sep_siblings_translation_classes_level_starts) @memoize_method def m2l_translation_classes_dependent_data_level_starts(self): @@ -389,7 +391,7 @@ def order_to_size(order): return build_csr_level_starts(self.level_orders, order_to_size, level_starts=self.m2l_translation_class_level_start_box_nrs()) - def multipole_expansion_zeros(self, actx: ArrayContext) -> Array: + def multipole_expansion_zeros(self, actx: PyOpenCLArrayContext) -> Array: """Return an expansions array (which must support addition) capable of holding one multipole or local expansion for every box in the tree. @@ -407,18 +409,25 @@ def local_expansion_zeros(self, actx) -> Array: self.local_expansions_level_starts()[-1], dtype=self.dtype) - def m2l_translation_classes_dependent_data_zeros(self, queue): + def m2l_translation_classes_dependent_data_zeros( + self, actx: PyOpenCLArrayContext): + data_level_starts = ( + self.m2l_translation_classes_dependent_data_level_starts()) + level_start_box_nrs = ( + self.m2l_translation_class_level_start_box_nrs()) + result = [] for level in range(self.tree.nlevels): - expn_start, expn_stop = \ - self.m2l_translation_classes_dependent_data_level_starts()[ - level:level+2] - translation_class_start, translation_class_stop = \ - self.m2l_translation_class_level_start_box_nrs()[level:level+2] - exprs_level = cl.array.zeros(queue, expn_stop - expn_start, - dtype=self.preprocessed_mpole_dtype) - result.append(exprs_level.reshape( - translation_class_stop - translation_class_start, -1)) + expn_start, expn_stop = data_level_starts[level:level + 2] + translation_class_start, translation_class_stop = ( + level_start_box_nrs[level:level + 2]) + + exprs_level = actx.zeros( + expn_stop - expn_start, + dtype=self.preprocessed_mpole_dtype + ).reshape(translation_class_stop - translation_class_start, -1) + result.append(exprs_level) + return result def multipole_expansions_view(self, mpole_exps, level): @@ -456,15 +465,21 @@ def order_to_size(order): return build_csr_level_starts(self.level_orders, order_to_size, level_starts=self.tree.level_start_box_nrs) - def m2l_preproc_mpole_expansion_zeros(self, template_ary): + def m2l_preproc_mpole_expansion_zeros( + self, actx: PyOpenCLArrayContext, template_ary): + level_starts = self.m2l_preproc_mpole_expansions_level_starts() + result = [] for level in range(self.tree.nlevels): - expn_start, expn_stop = \ - self.m2l_preproc_mpole_expansions_level_starts()[level:level+2] + expn_start, expn_stop = level_starts[level:level+2] box_start, box_stop = self.tree.level_start_box_nrs[level:level+2] - exprs_level = cl.array.zeros(template_ary.queue, expn_stop - expn_start, - dtype=self.preprocessed_mpole_dtype) - result.append(exprs_level.reshape(box_stop - box_start, -1)) + + exprs_level = actx.zeros( + expn_stop - expn_start, + dtype=self.preprocessed_mpole_dtype, + ).reshape(box_stop - box_start, -1) + result.append(exprs_level) + return result def m2l_preproc_mpole_expansions_view(self, mpole_exps, level): @@ -476,7 +491,7 @@ def m2l_preproc_mpole_expansions_view(self, mpole_exps, level): m2l_work_array_level_starts = \ m2l_preproc_mpole_expansions_level_starts - def output_zeros(self, actx: ArrayContext) -> np.ndarray: + def output_zeros(self, actx: PyOpenCLArrayContext) -> np.ndarray: """Return a potentials array (which must support addition) capable of holding a potential value for each target in the tree. Note that :func:`drive_fmm` makes no assumptions about *potential* other than @@ -485,9 +500,7 @@ def output_zeros(self, actx: ArrayContext) -> np.ndarray: """ from pytools.obj_array import make_obj_array return make_obj_array([ - actx.zeros( - self.tree.ntargets, - dtype=self.dtype) + actx.zeros(self.tree.ntargets, dtype=self.dtype) for k in self.tree_indep.target_kernels]) def reorder_sources(self, source_array): @@ -508,16 +521,18 @@ def reorder(x): @property @memoize_method def max_nsources_in_one_box(self): - with cl.CommandQueue(self.tree_indep.cl_context) as queue: - return int(pyopencl.array.max(self.tree.box_source_counts_nonchild, - queue).get()) + actx = self.tree_indep._setup_actx + return actx.to_numpy( + actx.np.max(self.tree.box_source_counts_nonchild) + ) @property @memoize_method def max_ntargets_in_one_box(self): - with cl.CommandQueue(self.tree_indep.cl_context) as queue: - return int(pyopencl.array.max(self.tree.box_target_counts_nonchild, - queue).get()) + actx = self.tree_indep._setup_actx + return actx.to_numpy( + actx.np.max(self.tree.box_target_counts_nonchild) + ) # }}} @@ -541,13 +556,14 @@ def box_target_list_kwargs(self): # }}} - def run_opencl_fft(self, queue, input_vec, inverse, wait_for): + def run_opencl_fft(self, actx: PyOpenCLArrayContext, + input_vec, inverse, wait_for): app = self.tree_indep.opencl_fft_app(input_vec.shape, input_vec.dtype, inverse) - return run_opencl_fft(app, queue, input_vec, inverse, wait_for) + return run_opencl_fft(actx, app, input_vec, inverse, wait_for) def form_multipoles(self, - actx: ArrayContext, + actx: PyOpenCLArrayContext, level_start_source_box_nrs, source_boxes, src_weight_vecs): mpoles = self.multipole_expansion_zeros(actx) @@ -580,14 +596,13 @@ def form_multipoles(self, return mpoles def coarsen_multipoles(self, - actx: ArrayContext, + actx: PyOpenCLArrayContext, level_start_source_parent_box_nrs, source_parent_boxes, mpoles): tree = self.tree events = [] - queue = mpoles.queue # nlevels-1 is the last valid level index # nlevels-2 is the last valid level that could have children @@ -614,8 +629,8 @@ def coarsen_multipoles(self, target_level_start_ibox, target_mpoles_view = \ self.multipole_expansions_view(mpoles, target_level) - evt, (mpoles_res,) = m2m( - queue, + mpoles_res = m2m( + actx, src_expansions=source_mpoles_view, src_base_ibox=source_level_start_ibox, tgt_expansions=target_mpoles_view, @@ -629,17 +644,16 @@ def coarsen_multipoles(self, tgt_rscale=self.level_to_rscale(target_level), **self.kernel_extra_kwargs) - events.append(evt) - assert mpoles_res is target_mpoles_view + assert mpoles_res[0] is target_mpoles_view if events: mpoles.add_event(events[-1]) - return (mpoles, SumpyTimingFuture(queue, events)) + return (mpoles, SumpyTimingFuture(actx.queue, events)) def eval_direct(self, - actx: ArrayContext, + actx: PyOpenCLArrayContext, target_boxes, source_box_starts, source_box_lists, src_weight_vecs): pot = self.output_zeros(actx) @@ -650,9 +664,8 @@ def eval_direct(self, kwargs.update(self.box_target_list_kwargs()) events = [] - queue = src_weight_vecs[0].queue - evt, pot_res = self.tree_indep.p2p()(queue, + pot_res = self.tree_indep.p2p()(actx, target_boxes=target_boxes, source_box_starts=source_box_starts, source_box_lists=source_box_lists, @@ -661,67 +674,65 @@ def eval_direct(self, max_nsources_in_one_box=self.max_nsources_in_one_box, max_ntargets_in_one_box=self.max_ntargets_in_one_box, **kwargs) - events.append(evt) - for pot_i, pot_res_i in zip(pot, pot_res): + for pot_i, pot_res_i in zip(pot, pot_res[0]): assert pot_i is pot_res_i - pot_i.add_event(evt) - return (pot, SumpyTimingFuture(queue, events)) + return (pot, SumpyTimingFuture(actx.queue, events)) @memoize_method def multipole_to_local_precompute(self): - result = [] - with cl.CommandQueue(self.tree_indep.cl_context) as queue: - m2l_translation_classes_dependent_data = \ - self.m2l_translation_classes_dependent_data_zeros(queue) - for lev in range(self.tree.nlevels): - src_rscale = self.level_to_rscale(lev) - order = self.level_orders[lev] - precompute_kernel = \ - self.tree_indep.m2l_translation_class_dependent_data_kernel( - order, order) - - translation_classes_level_start, \ - m2l_translation_classes_dependent_data_view = \ - self.m2l_translation_classes_dependent_data_view( - m2l_translation_classes_dependent_data, lev) - - ntranslation_classes = \ - m2l_translation_classes_dependent_data_view.shape[0] + actx = self.tree_indep._setup_actx - if ntranslation_classes == 0: - result.append(pyopencl.array.empty_like( - m2l_translation_classes_dependent_data_view)) - continue + result = [] + m2l_translation_classes_dependent_data = ( + self.m2l_translation_classes_dependent_data_zeros(actx)) - data = self.translation_classes_data - m2l_translation_vectors = ( - data.from_sep_siblings_translation_class_to_distance_vector) - - evt, _ = precompute_kernel( - queue, - src_rscale=src_rscale, - translation_classes_level_start=translation_classes_level_start, - ntranslation_classes=ntranslation_classes, - m2l_translation_classes_dependent_data=( - m2l_translation_classes_dependent_data_view), - m2l_translation_vectors=m2l_translation_vectors, - ntranslation_vectors=m2l_translation_vectors.shape[1], - **self.kernel_extra_kwargs + for lev in range(self.tree.nlevels): + src_rscale = self.level_to_rscale(lev) + order = self.level_orders[lev] + precompute_kernel = ( + self.tree_indep.m2l_translation_class_dependent_data_kernel( + order, order) ) - if self.tree_indep.m2l_translation.use_fft: - _, m2l_translation_classes_dependent_data_view = \ - self.run_opencl_fft(queue, - m2l_translation_classes_dependent_data_view, - inverse=False, wait_for=[evt]) - result.append(m2l_translation_classes_dependent_data_view) + translation_classes_level_start, \ + m2l_translation_classes_dependent_data_view = \ + self.m2l_translation_classes_dependent_data_view( + m2l_translation_classes_dependent_data, lev) - for lev in range(self.tree.nlevels): - result[lev].finish() + ntranslation_classes = \ + m2l_translation_classes_dependent_data_view.shape[0] + + if ntranslation_classes == 0: + result.append(pyopencl.array.empty_like( + m2l_translation_classes_dependent_data_view)) + continue - result = [arr.with_queue(None) for arr in result] + data = self.translation_classes_data + m2l_translation_vectors = ( + data.from_sep_siblings_translation_class_to_distance_vector) + + evt, _ = precompute_kernel( + actx, + src_rscale=src_rscale, + translation_classes_level_start=translation_classes_level_start, + ntranslation_classes=ntranslation_classes, + m2l_translation_classes_dependent_data=( + m2l_translation_classes_dependent_data_view), + m2l_translation_vectors=m2l_translation_vectors, + ntranslation_vectors=m2l_translation_vectors.shape[1], + **self.kernel_extra_kwargs + ) + + if self.tree_indep.m2l_translation.use_fft: + _, m2l_translation_classes_dependent_data_view = \ + self.run_opencl_fft(actx, + m2l_translation_classes_dependent_data_view, + inverse=False, wait_for=[evt]) + result.append(m2l_translation_classes_dependent_data_view) + + result = [actx.freeze(arr) for arr in result] return result def _add_m2l_precompute_kwargs(self, kwargs_for_m2l, @@ -746,17 +757,16 @@ def _add_m2l_precompute_kwargs(self, kwargs_for_m2l, self.translation_classes_data.from_sep_siblings_translation_classes def multipole_to_local(self, - actx: ArrayContext, + actx: PyOpenCLArrayContext, level_start_target_box_nrs, target_boxes, src_box_starts, src_box_lists, mpole_exps): - queue = mpole_exps.queue local_exps = self.local_expansion_zeros(actx) if self.tree_indep.m2l_translation.use_preprocessing: - preprocessed_mpole_exps = \ - self.m2l_preproc_mpole_expansion_zeros(mpole_exps) + preprocessed_mpole_exps = ( + self.m2l_preproc_mpole_expansion_zeros(mpole_exps)) m2l_work_array = self.m2l_work_array_zeros(local_exps) mpole_exps_view_func = self.m2l_preproc_mpole_expansions_view local_exps_view_func = self.m2l_work_array_view @@ -791,7 +801,7 @@ def multipole_to_local(self, continue evt, _ = preprocess_mpole_kernel( - queue, + actx, src_expansions=source_mpoles_view, preprocessed_src_expansions=preprocessed_mpole_exps[lev], src_rscale=self.level_to_rscale(lev), @@ -802,7 +812,7 @@ def multipole_to_local(self, if self.tree_indep.m2l_translation.use_fft: evt_fft, preprocessed_mpole_exps[lev] = \ - self.run_opencl_fft(queue, + self.run_opencl_fft(actx, preprocessed_mpole_exps[lev], inverse=False, wait_for=wait_for) wait_for.append(get_native_event(evt_fft)) @@ -840,7 +850,7 @@ def multipole_to_local(self, kwargs["m2l_translation_classes_dependent_data"].size == 0: # There is nothing to do for this level continue - evt, _ = m2l(queue, **kwargs, wait_for=wait_for) + evt, _ = m2l(actx, **kwargs, wait_for=wait_for) wait_for.append(evt) translate_evts.append(evt) @@ -863,13 +873,13 @@ def multipole_to_local(self, if self.tree_indep.m2l_translation.use_fft: evt_fft, target_locals_before_postprocessing_view = \ - self.run_opencl_fft(queue, + self.run_opencl_fft(actx, target_locals_before_postprocessing_view, inverse=True, wait_for=wait_for) wait_for.append(get_native_event(evt_fft)) evt, _ = postprocess_local_kernel( - queue, + actx, tgt_expansions=target_locals_view, tgt_expansions_before_postprocessing=( target_locals_before_postprocessing_view), @@ -886,10 +896,10 @@ def multipole_to_local(self, timing_events = preprocess_evts + translate_evts + postprocess_evts - return (local_exps, SumpyTimingFuture(queue, timing_events)) + return (local_exps, SumpyTimingFuture(actx.queue, timing_events)) def eval_multipoles(self, - actx: ArrayContext, + actx: PyOpenCLArrayContext, target_boxes_by_source_level, source_boxes_by_level, mpole_exps): pot = self.output_zeros(actx) @@ -897,8 +907,6 @@ def eval_multipoles(self, kwargs.update(self.box_target_list_kwargs()) events = [] - queue = mpole_exps.queue - wait_for = mpole_exps.events for isrc_level, ssn in enumerate(source_boxes_by_level): @@ -911,7 +919,7 @@ def eval_multipoles(self, self.multipole_expansions_view(mpole_exps, isrc_level) evt, pot_res = m2p( - queue, + actx, src_expansions=source_mpoles_view, src_base_ibox=source_level_start_ibox, @@ -938,10 +946,10 @@ def eval_multipoles(self, for pot_i in pot: pot_i.add_event(events[-1]) - return (pot, SumpyTimingFuture(queue, events)) + return (pot, SumpyTimingFuture(actx.queue, events)) def form_locals(self, - actx: ArrayContext, + actx: PyOpenCLArrayContext, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, starts, lists, src_weight_vecs): local_exps = self.local_expansion_zeros(actx) @@ -950,7 +958,6 @@ def form_locals(self, kwargs.update(self.box_source_list_kwargs()) events = [] - queue = src_weight_vecs[0].queue for lev in range(self.tree.nlevels): start, stop = \ @@ -964,7 +971,7 @@ def form_locals(self, self.local_expansions_view(local_exps, lev) evt, (result,) = p2l( - queue, + actx, target_boxes=target_or_target_parent_boxes[start:stop], source_box_starts=starts[start:stop+1], source_box_lists=lists, @@ -981,16 +988,15 @@ def form_locals(self, assert result is target_local_exps_view - return (local_exps, SumpyTimingFuture(queue, events)) + return (local_exps, SumpyTimingFuture(actx.queue, events)) def refine_locals(self, - actx: ArrayContext, + actx: PyOpenCLArrayContext, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, local_exps): events = [] - queue = local_exps.queue for target_lev in range(1, self.tree.nlevels): start, stop = level_start_target_or_target_parent_box_nrs[ @@ -1008,7 +1014,7 @@ def refine_locals(self, target_level_start_ibox, target_local_exps_view = \ self.local_expansions_view(local_exps, target_lev) - evt, (local_exps_res,) = l2l(queue, + evt, (local_exps_res,) = l2l(actx, src_expansions=source_local_exps_view, src_base_ibox=source_level_start_ibox, tgt_expansions=target_local_exps_view, @@ -1028,10 +1034,10 @@ def refine_locals(self, local_exps.add_event(evt) - return (local_exps, SumpyTimingFuture(queue, [evt])) + return (local_exps, SumpyTimingFuture(actx.queue, [evt])) def eval_locals(self, - actx: ArrayContext, + actx: PyOpenCLArrayContext, level_start_target_box_nrs, target_boxes, local_exps): pot = self.output_zeros(actx) @@ -1039,7 +1045,6 @@ def eval_locals(self, kwargs.update(self.box_target_list_kwargs()) events = [] - queue = local_exps.queue for lev in range(self.tree.nlevels): start, stop = level_start_target_box_nrs[lev:lev+2] @@ -1052,7 +1057,7 @@ def eval_locals(self, self.local_expansions_view(local_exps, lev) evt, pot_res = l2p( - queue, + actx, src_expansions=source_local_exps_view, src_base_ibox=source_level_start_ibox, @@ -1069,9 +1074,9 @@ def eval_locals(self, for pot_i, pot_res_i in zip(pot, pot_res): assert pot_i is pot_res_i - return (pot, SumpyTimingFuture(queue, events)) + return (pot, SumpyTimingFuture(actx.queue, events)) - def finalize_potentials(self, actx: ArrayContext, potentials): + def finalize_potentials(self, actx: PyOpenCLArrayContext, potentials): return potentials # }}} diff --git a/test/test_fmm.py b/test/test_fmm.py index 1825b92fe..85fb0ee03 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -234,7 +234,7 @@ def fmm_level_to_order(kernel, kernel_args, tree, lev): from boxtree.fmm import drive_fmm - pot, = drive_fmm(actx, wrangler, (weights,)) + pot, = drive_fmm(wrangler, (weights,)) from sumpy import P2P p2p = P2P(actx, target_kernels, exclude_self=False) From 276b42aeab510753d961cc2c9a330a8c75ca6265 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Thu, 8 Sep 2022 08:51:36 +0300 Subject: [PATCH 27/59] update drive_fmm from boxtree --- sumpy/fmm.py | 12 ++++++++---- test/test_fmm.py | 12 ++++++------ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/sumpy/fmm.py b/sumpy/fmm.py index b4b8b6eff..bccf382f4 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -337,6 +337,10 @@ def __init__(self, tree_indep, traversal, dtype, fmm_level_to_order, self.translation_classes_data = translation_classes_data + @property + def _setup_actx(self): + return self.tree_indep._setup_actx + def level_to_rscale(self, level): tree = self.tree order = self.level_orders[level] @@ -374,7 +378,7 @@ def local_expansions_level_starts(self): @memoize_method def m2l_translation_class_level_start_box_nrs(self): - actx = self.tree_indep._setup_actx + actx = self._setup_actx return actx.to_numpy( self.translation_classes_data .from_sep_siblings_translation_classes_level_starts) @@ -521,7 +525,7 @@ def reorder(x): @property @memoize_method def max_nsources_in_one_box(self): - actx = self.tree_indep._setup_actx + actx = self._setup_actx return actx.to_numpy( actx.np.max(self.tree.box_source_counts_nonchild) ) @@ -529,7 +533,7 @@ def max_nsources_in_one_box(self): @property @memoize_method def max_ntargets_in_one_box(self): - actx = self.tree_indep._setup_actx + actx = self._setup_actx return actx.to_numpy( actx.np.max(self.tree.box_target_counts_nonchild) ) @@ -682,7 +686,7 @@ def eval_direct(self, @memoize_method def multipole_to_local_precompute(self): - actx = self.tree_indep._setup_actx + actx = self._setup_actx result = [] m2l_translation_classes_dependent_data = ( diff --git a/test/test_fmm.py b/test/test_fmm.py index 85fb0ee03..79d9f15c8 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -234,7 +234,7 @@ def fmm_level_to_order(kernel, kernel_args, tree, lev): from boxtree.fmm import drive_fmm - pot, = drive_fmm(wrangler, (weights,)) + pot, = drive_fmm(actx, wrangler, (weights,)) from sumpy import P2P p2p = P2P(actx, target_kernels, exclude_self=False) @@ -412,7 +412,7 @@ def test_unified_single_and_double(actx_factory, visualize=False): from boxtree.fmm import drive_fmm - pot = drive_fmm(wrangler, weights) + pot = drive_fmm(actx, wrangler, weights) results.append(np.array([actx.to_numpy(pot[0]), actx.to_numpy(pot[1])])) ref_pot = results[0] + results[1] @@ -485,7 +485,7 @@ def test_sumpy_fmm_timing_data_collection(ctx_factory, use_fft, visualize=False) from boxtree.fmm import drive_fmm timing_data = {} - pot, = drive_fmm(wrangler, (weights,), timing_data=timing_data) + pot, = drive_fmm(actx, wrangler, (weights,), timing_data=timing_data) logger.info("timing_data:\n%s", timing_data) assert timing_data @@ -539,7 +539,7 @@ def test_sumpy_fmm_exclude_self(actx_factory, visualize=False): from boxtree.fmm import drive_fmm - pot, = drive_fmm(wrangler, (weights,)) + pot, = drive_fmm(actx, wrangler, (weights,)) from sumpy import P2P p2p = P2P(actx, target_kernels, exclude_self=True) @@ -610,7 +610,7 @@ def test_sumpy_axis_source_derivative(actx_factory, visualize=False): from boxtree.fmm import drive_fmm - pot, = drive_fmm(wrangler, (weights,)) + pot, = drive_fmm(actx, wrangler, (weights,)) pots.append(actx.to_numpy(pot)) rel_err = la.norm(pots[0] + pots[1]) / la.norm(pots[0]) @@ -679,7 +679,7 @@ def test_sumpy_target_point_multiplier(actx_factory, deriv_axes, visualize=False from boxtree.fmm import drive_fmm - pot0, pot1, pot2 = drive_fmm(wrangler, (weights,)) + pot0, pot1, pot2 = drive_fmm(actx, wrangler, (weights,)) pot0, pot1, pot2 = actx.to_numpy(pot0), actx.to_numpy(pot1), actx.to_numpy(pot2) if deriv_axes == (0,): ref_pot = pot1 * actx.to_numpy(sources[0]) + pot2 From 4c1985dd22b2d73a91a8a2496afabee0f35fe30c Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 17 Sep 2022 17:14:22 +0300 Subject: [PATCH 28/59] more work towards getting the fmm working --- sumpy/fmm.py | 170 ++++++++++++++++++++++------------------------- sumpy/p2p.py | 2 +- test/test_fmm.py | 6 +- 3 files changed, 82 insertions(+), 96 deletions(-) diff --git a/sumpy/fmm.py b/sumpy/fmm.py index bccf382f4..c6a317c03 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -26,10 +26,7 @@ .. autoclass:: SumpyExpansionWrangler """ - import numpy as np -import pyopencl as cl -import pyopencl.array # noqa from pytools import memoize_method from boxtree.fmm import TreeIndependentDataForWrangler, ExpansionWranglerInterface @@ -45,10 +42,12 @@ E2EFromChildren, E2EFromParent, M2LGenerateTranslationClassesDependentData, M2LPreprocessMultipole, M2LPostprocessLocal) -from sumpy.tools import (to_complex_dtype, AggregateProfilingEvent, - run_opencl_fft, get_opencl_fft_app, get_native_event) +from sumpy.tools import ( + to_complex_dtype, + run_opencl_fft, get_opencl_fft_app, + get_native_event) -from typing import TypeVar, List, Union +from typing import TypeVar, List # {{{ tree-independent data for wrangler @@ -185,7 +184,7 @@ def p2p(self): @memoize_method def opencl_fft_app(self, shape, dtype, inverse): - return get_opencl_fft_app(self._setup_actx, shape, dtype, inverse) + return get_opencl_fft_app(self._setup_actx, shape, dtype, inverse=inverse) # }}} @@ -208,17 +207,19 @@ class UnableToCollectTimingData(UserWarning): class SumpyTimingFuture: - def __init__(self, queue, events: List[Union[cl.Event, EventLike]]): + def __init__(self, queue, events: List[EventLike]): self.queue = queue self.events = events @property - def native_events(self) -> List[cl.Event]: + def native_events(self) -> List[EventLike]: + import pyopencl as cl return [evt if isinstance(evt, cl.Event) else evt.native_event for evt in self.events] @memoize_method def result(self): + import pyopencl as cl from boxtree.timing import TimingResult if not self.queue.properties & cl.command_queue_properties.PROFILING_ENABLE: @@ -231,7 +232,7 @@ def result(self): return TimingResult(wall_elapsed=None) if self.events: - pyopencl.wait_for_events(self.native_events) + cl.wait_for_events(self.native_events) result = 0 for event in self.events: @@ -242,6 +243,7 @@ def result(self): return TimingResult(wall_elapsed=result) def done(self): + import pyopencl as cl return all( event.get_info(cl.event_info.COMMAND_EXECUTION_STATUS) == cl.command_execution_status.COMPLETE @@ -362,9 +364,14 @@ def level_to_rscale(self, level): # {{{ data vector utilities + @property + @memoize_method + def tree_level_start_box_nrs(self): + return self._setup_actx.to_numpy(self.tree.level_start_box_nrs) + def _expansions_level_starts(self, order_to_size): return build_csr_level_starts(self.level_orders, order_to_size, - self.tree.level_start_box_nrs) + self.tree_level_start_box_nrs) @memoize_method def multipole_expansions_level_starts(self): @@ -437,7 +444,7 @@ def m2l_translation_classes_dependent_data_zeros( def multipole_expansions_view(self, mpole_exps, level): expn_start, expn_stop = \ self.multipole_expansions_level_starts()[level:level+2] - box_start, box_stop = self.tree.level_start_box_nrs[level:level+2] + box_start, box_stop = self.tree_level_start_box_nrs[level:level+2] return (box_start, mpole_exps[expn_start:expn_stop].reshape(box_stop-box_start, -1)) @@ -445,7 +452,7 @@ def multipole_expansions_view(self, mpole_exps, level): def local_expansions_view(self, local_exps, level): expn_start, expn_stop = \ self.local_expansions_level_starts()[level:level+2] - box_start, box_stop = self.tree.level_start_box_nrs[level:level+2] + box_start, box_stop = self.tree_level_start_box_nrs[level:level+2] return (box_start, local_exps[expn_start:expn_stop].reshape(box_stop-box_start, -1)) @@ -467,7 +474,7 @@ def order_to_size(order): return res return build_csr_level_starts(self.level_orders, order_to_size, - level_starts=self.tree.level_start_box_nrs) + level_starts=self.tree_level_start_box_nrs) def m2l_preproc_mpole_expansion_zeros( self, actx: PyOpenCLArrayContext, template_ary): @@ -476,7 +483,7 @@ def m2l_preproc_mpole_expansion_zeros( result = [] for level in range(self.tree.nlevels): expn_start, expn_stop = level_starts[level:level+2] - box_start, box_stop = self.tree.level_start_box_nrs[level:level+2] + box_start, box_stop = self.tree_level_start_box_nrs[level:level+2] exprs_level = actx.zeros( expn_stop - expn_start, @@ -487,7 +494,7 @@ def m2l_preproc_mpole_expansion_zeros( return result def m2l_preproc_mpole_expansions_view(self, mpole_exps, level): - box_start, _ = self.tree.level_start_box_nrs[level:level+2] + box_start, _ = self.tree_level_start_box_nrs[level:level+2] return (box_start, mpole_exps[level]) m2l_work_array_view = m2l_preproc_mpole_expansions_view @@ -528,7 +535,7 @@ def max_nsources_in_one_box(self): actx = self._setup_actx return actx.to_numpy( actx.np.max(self.tree.box_source_counts_nonchild) - ) + ).item() @property @memoize_method @@ -536,7 +543,7 @@ def max_ntargets_in_one_box(self): actx = self._setup_actx return actx.to_numpy( actx.np.max(self.tree.box_target_counts_nonchild) - ) + ).item() # }}} @@ -564,13 +571,15 @@ def run_opencl_fft(self, actx: PyOpenCLArrayContext, input_vec, inverse, wait_for): app = self.tree_indep.opencl_fft_app(input_vec.shape, input_vec.dtype, inverse) - return run_opencl_fft(actx, app, input_vec, inverse, wait_for) + return run_opencl_fft( + actx, app, input_vec, inverse=inverse, wait_for=wait_for) def form_multipoles(self, actx: PyOpenCLArrayContext, level_start_source_box_nrs, source_boxes, src_weight_vecs): mpoles = self.multipole_expansion_zeros(actx) + level_start_source_box_nrs = actx.to_numpy(level_start_source_box_nrs) kwargs = self.extra_kwargs.copy() kwargs.update(self.box_source_list_kwargs()) @@ -584,7 +593,7 @@ def form_multipoles(self, level_start_ibox, mpoles_view = self.multipole_expansions_view( mpoles, lev) - evt, (mpoles_res,) = p2m( + mpoles_res = p2m( actx, source_boxes=source_boxes[start:stop], centers=self.tree.box_centers, @@ -593,11 +602,12 @@ def form_multipoles(self, tgt_base_ibox=level_start_ibox, rscale=self.level_to_rscale(lev), - **kwargs) + **kwargs)["tgt_expansions"] assert mpoles_res is mpoles_view - return mpoles + # FIXME: rip out SumpyTimingFuture + return mpoles, SumpyTimingFuture(actx.queue, []) def coarsen_multipoles(self, actx: PyOpenCLArrayContext, @@ -605,8 +615,8 @@ def coarsen_multipoles(self, source_parent_boxes, mpoles): tree = self.tree - - events = [] + level_start_source_parent_box_nrs = ( + actx.to_numpy(level_start_source_parent_box_nrs)) # nlevels-1 is the last valid level index # nlevels-2 is the last valid level that could have children @@ -647,14 +657,11 @@ def coarsen_multipoles(self, src_rscale=self.level_to_rscale(source_level), tgt_rscale=self.level_to_rscale(target_level), - **self.kernel_extra_kwargs) - - assert mpoles_res[0] is target_mpoles_view + **self.kernel_extra_kwargs)["tgt_expansions"] - if events: - mpoles.add_event(events[-1]) + assert mpoles_res is target_mpoles_view - return (mpoles, SumpyTimingFuture(actx.queue, events)) + return mpoles, SumpyTimingFuture(actx.queue, []) def eval_direct(self, actx: PyOpenCLArrayContext, @@ -667,8 +674,6 @@ def eval_direct(self, kwargs.update(self.box_source_list_kwargs()) kwargs.update(self.box_target_list_kwargs()) - events = [] - pot_res = self.tree_indep.p2p()(actx, target_boxes=target_boxes, source_box_starts=source_box_starts, @@ -679,10 +684,10 @@ def eval_direct(self, max_ntargets_in_one_box=self.max_ntargets_in_one_box, **kwargs) - for pot_i, pot_res_i in zip(pot, pot_res[0]): - assert pot_i is pot_res_i + for i in range(pot.size): + assert pot_res[f"result_s{i}"] is pot[i] - return (pot, SumpyTimingFuture(actx.queue, events)) + return pot, SumpyTimingFuture(actx.queue, []) @memoize_method def multipole_to_local_precompute(self): @@ -709,7 +714,7 @@ def multipole_to_local_precompute(self): m2l_translation_classes_dependent_data_view.shape[0] if ntranslation_classes == 0: - result.append(pyopencl.array.empty_like( + result.append(actx.np.zeros_like( m2l_translation_classes_dependent_data_view)) continue @@ -717,7 +722,7 @@ def multipole_to_local_precompute(self): m2l_translation_vectors = ( data.from_sep_siblings_translation_class_to_distance_vector) - evt, _ = precompute_kernel( + precompute_kernel( actx, src_rscale=src_rscale, translation_classes_level_start=translation_classes_level_start, @@ -730,10 +735,9 @@ def multipole_to_local_precompute(self): ) if self.tree_indep.m2l_translation.use_fft: - _, m2l_translation_classes_dependent_data_view = \ - self.run_opencl_fft(actx, + self.run_opencl_fft(actx, m2l_translation_classes_dependent_data_view, - inverse=False, wait_for=[evt]) + inverse=False, wait_for=None) result.append(m2l_translation_classes_dependent_data_view) result = [actx.freeze(arr) for arr in result] @@ -767,11 +771,12 @@ def multipole_to_local(self, mpole_exps): local_exps = self.local_expansion_zeros(actx) + level_start_target_box_nrs = actx.to_numpy(level_start_target_box_nrs) if self.tree_indep.m2l_translation.use_preprocessing: preprocessed_mpole_exps = ( - self.m2l_preproc_mpole_expansion_zeros(mpole_exps)) - m2l_work_array = self.m2l_work_array_zeros(local_exps) + self.m2l_preproc_mpole_expansion_zeros(actx, mpole_exps)) + m2l_work_array = self.m2l_work_array_zeros(actx, local_exps) mpole_exps_view_func = self.m2l_preproc_mpole_expansions_view local_exps_view_func = self.m2l_work_array_view else: @@ -804,7 +809,7 @@ def multipole_to_local(self, # There is no M2L happening in this level continue - evt, _ = preprocess_mpole_kernel( + preprocess_mpole_kernel( actx, src_expansions=source_mpoles_view, preprocessed_src_expansions=preprocessed_mpole_exps[lev], @@ -812,15 +817,15 @@ def multipole_to_local(self, wait_for=wait_for, **self.kernel_extra_kwargs ) - wait_for.append(evt) if self.tree_indep.m2l_translation.use_fft: - evt_fft, preprocessed_mpole_exps[lev] = \ + evt, preprocessed_mpole_exps[lev] = \ self.run_opencl_fft(actx, preprocessed_mpole_exps[lev], inverse=False, wait_for=wait_for) - wait_for.append(get_native_event(evt_fft)) - evt = AggregateProfilingEvent([evt, evt_fft]) + wait_for.append(get_native_event(evt)) + else: + evt = None preprocess_evts.append(evt) @@ -854,9 +859,7 @@ def multipole_to_local(self, kwargs["m2l_translation_classes_dependent_data"].size == 0: # There is nothing to do for this level continue - evt, _ = m2l(actx, **kwargs, wait_for=wait_for) - wait_for.append(evt) - translate_evts.append(evt) + m2l(actx, **kwargs, wait_for=wait_for) if self.tree_indep.m2l_translation.use_preprocessing: order = self.level_orders[lev] @@ -882,7 +885,7 @@ def multipole_to_local(self, inverse=True, wait_for=wait_for) wait_for.append(get_native_event(evt_fft)) - evt, _ = postprocess_local_kernel( + postprocess_local_kernel( actx, tgt_expansions=target_locals_view, tgt_expansions_before_postprocessing=( @@ -894,9 +897,7 @@ def multipole_to_local(self, ) if self.tree_indep.m2l_translation.use_fft: - postprocess_evts.append(AggregateProfilingEvent([evt_fft, evt])) - else: - postprocess_evts.append(evt) + postprocess_evts.append(evt_fft) timing_events = preprocess_evts + translate_evts + postprocess_evts @@ -910,9 +911,7 @@ def eval_multipoles(self, kwargs = self.kernel_extra_kwargs.copy() kwargs.update(self.box_target_list_kwargs()) - events = [] wait_for = mpole_exps.events - for isrc_level, ssn in enumerate(source_boxes_by_level): if len(target_boxes_by_source_level[isrc_level]) == 0: continue @@ -922,7 +921,7 @@ def eval_multipoles(self, source_level_start_ibox, source_mpoles_view = \ self.multipole_expansions_view(mpole_exps, isrc_level) - evt, pot_res = m2p( + pot_res = m2p( actx, src_expansions=source_mpoles_view, @@ -939,33 +938,26 @@ def eval_multipoles(self, wait_for=wait_for, **kwargs) - events.append(evt) - wait_for = [evt] + for i in range(pot.size): + assert pot_res[f"result_s{i}"] is pot[i] - for pot_i, pot_res_i in zip(pot, pot_res): - assert pot_i is pot_res_i - - if events: - for pot_i in pot: - pot_i.add_event(events[-1]) - - return (pot, SumpyTimingFuture(actx.queue, events)) + return pot, SumpyTimingFuture(actx.queue, []) def form_locals(self, actx: PyOpenCLArrayContext, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, starts, lists, src_weight_vecs): local_exps = self.local_expansion_zeros(actx) + level_start_target_or_target_parent_box_nrs = ( + actx.to_numpy(level_start_target_or_target_parent_box_nrs)) kwargs = self.extra_kwargs.copy() kwargs.update(self.box_source_list_kwargs()) - events = [] - for lev in range(self.tree.nlevels): - start, stop = \ - level_start_target_or_target_parent_box_nrs[lev:lev+2] + start, stop = ( + level_start_target_or_target_parent_box_nrs[lev:lev+2]) if start == stop: continue @@ -974,7 +966,7 @@ def form_locals(self, target_level_start_ibox, target_local_exps_view = \ self.local_expansions_view(local_exps, lev) - evt, (result,) = p2l( + result = p2l( actx, target_boxes=target_or_target_parent_boxes[start:stop], source_box_starts=starts[start:stop+1], @@ -987,24 +979,23 @@ def form_locals(self, rscale=self.level_to_rscale(lev), - **kwargs) - events.append(evt) + **kwargs)["tgt_expansions"] assert result is target_local_exps_view - return (local_exps, SumpyTimingFuture(actx.queue, events)) + return local_exps, SumpyTimingFuture(actx.queue, []) def refine_locals(self, actx: PyOpenCLArrayContext, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, local_exps): - - events = [] + level_start_target_or_target_parent_box_nrs = ( + actx.to_numpy(level_start_target_or_target_parent_box_nrs)) for target_lev in range(1, self.tree.nlevels): - start, stop = level_start_target_or_target_parent_box_nrs[ - target_lev:target_lev+2] + start, stop = ( + level_start_target_or_target_parent_box_nrs[target_lev:target_lev+2]) if start == stop: continue @@ -1018,7 +1009,7 @@ def refine_locals(self, target_level_start_ibox, target_local_exps_view = \ self.local_expansions_view(local_exps, target_lev) - evt, (local_exps_res,) = l2l(actx, + local_exps_res = l2l(actx, src_expansions=source_local_exps_view, src_base_ibox=source_level_start_ibox, tgt_expansions=target_local_exps_view, @@ -1031,25 +1022,21 @@ def refine_locals(self, src_rscale=self.level_to_rscale(source_lev), tgt_rscale=self.level_to_rscale(target_lev), - **self.kernel_extra_kwargs) - events.append(evt) + **self.kernel_extra_kwargs)["tgt_expansions"] assert local_exps_res is target_local_exps_view - local_exps.add_event(evt) - - return (local_exps, SumpyTimingFuture(actx.queue, [evt])) + return local_exps, SumpyTimingFuture(actx.queue, []) def eval_locals(self, actx: PyOpenCLArrayContext, level_start_target_box_nrs, target_boxes, local_exps): pot = self.output_zeros(actx) + level_start_target_box_nrs = actx.to_numpy(level_start_target_box_nrs) kwargs = self.kernel_extra_kwargs.copy() kwargs.update(self.box_target_list_kwargs()) - events = [] - for lev in range(self.tree.nlevels): start, stop = level_start_target_box_nrs[lev:lev+2] if start == stop: @@ -1060,7 +1047,7 @@ def eval_locals(self, source_level_start_ibox, source_local_exps_view = \ self.local_expansions_view(local_exps, lev) - evt, pot_res = l2p( + pot_res = l2p( actx, src_expansions=source_local_exps_view, @@ -1073,12 +1060,11 @@ def eval_locals(self, rscale=self.level_to_rscale(lev), **kwargs) - events.append(evt) - for pot_i, pot_res_i in zip(pot, pot_res): - assert pot_i is pot_res_i + for i in range(pot.size): + assert pot_res[f"result_s{i}"] is pot[i] - return (pot, SumpyTimingFuture(actx.queue, events)) + return pot, SumpyTimingFuture(actx.queue, []) def finalize_potentials(self, actx: PyOpenCLArrayContext, potentials): return potentials diff --git a/sumpy/p2p.py b/sumpy/p2p.py index c2ec45eed..4370033ff 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -675,7 +675,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): knl = self.get_cached_optimized_kernel( max_nsources_in_one_box=max_nsources_in_one_box, max_ntargets_in_one_box=max_ntargets_in_one_box, - is_cpu=actx.queue.dev.type & cl.device_type.CPU) + is_cpu=actx.queue.device.type & cl.device_type.CPU) return actx.call_loopy(knl, **kwargs) diff --git a/test/test_fmm.py b/test/test_fmm.py index 79d9f15c8..834e9467e 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -237,11 +237,11 @@ def fmm_level_to_order(kernel, kernel_args, tree, lev): pot, = drive_fmm(actx, wrangler, (weights,)) from sumpy import P2P - p2p = P2P(actx, target_kernels, exclude_self=False) - evt, (ref_pot,) = p2p(actx, targets, sources, (weights,), **extra_kwargs) + p2p = P2P(target_kernels, exclude_self=False) + ref_pot = p2p(actx, targets, sources, (weights,), **extra_kwargs) pot = actx.to_numpy(pot) - ref_pot = actx.to_numpy(ref_pot) + ref_pot = actx.to_numpy(ref_pot["result_s0"]) rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("order %d -> relative l2 error: %g", order, rel_err) From 2c93c4efc558f909495cb072c6618df4b4229b5e Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 17 Sep 2022 20:34:19 +0300 Subject: [PATCH 29/59] fix up fmm tests --- sumpy/fmm.py | 31 +++++++++++++------------------ sumpy/tools.py | 4 ++-- test/test_fmm.py | 15 ++++++++------- 3 files changed, 23 insertions(+), 27 deletions(-) diff --git a/sumpy/fmm.py b/sumpy/fmm.py index c6a317c03..b4c750cfb 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -42,10 +42,7 @@ E2EFromChildren, E2EFromParent, M2LGenerateTranslationClassesDependentData, M2LPreprocessMultipole, M2LPostprocessLocal) -from sumpy.tools import ( - to_complex_dtype, - run_opencl_fft, get_opencl_fft_app, - get_native_event) +from sumpy.tools import (to_complex_dtype, run_opencl_fft, get_opencl_fft_app) from typing import TypeVar, List @@ -571,9 +568,15 @@ def run_opencl_fft(self, actx: PyOpenCLArrayContext, input_vec, inverse, wait_for): app = self.tree_indep.opencl_fft_app(input_vec.shape, input_vec.dtype, inverse) - return run_opencl_fft( + evt, result = run_opencl_fft( actx, app, input_vec, inverse=inverse, wait_for=wait_for) + from sumpy.tools import get_native_event + input_vec.add_event(get_native_event(evt)) + result.add_event(get_native_event(evt)) + + return result + def form_multipoles(self, actx: PyOpenCLArrayContext, level_start_source_box_nrs, source_boxes, @@ -735,9 +738,10 @@ def multipole_to_local_precompute(self): ) if self.tree_indep.m2l_translation.use_fft: - self.run_opencl_fft(actx, + m2l_translation_classes_dependent_data_view = ( + self.run_opencl_fft(actx, m2l_translation_classes_dependent_data_view, - inverse=False, wait_for=None) + inverse=False, wait_for=None)) result.append(m2l_translation_classes_dependent_data_view) result = [actx.freeze(arr) for arr in result] @@ -819,15 +823,10 @@ def multipole_to_local(self, ) if self.tree_indep.m2l_translation.use_fft: - evt, preprocessed_mpole_exps[lev] = \ + preprocessed_mpole_exps[lev] = \ self.run_opencl_fft(actx, preprocessed_mpole_exps[lev], inverse=False, wait_for=wait_for) - wait_for.append(get_native_event(evt)) - else: - evt = None - - preprocess_evts.append(evt) order = self.level_orders[lev] m2l = self.tree_indep.m2l(order, order, @@ -879,11 +878,10 @@ def multipole_to_local(self, continue if self.tree_indep.m2l_translation.use_fft: - evt_fft, target_locals_before_postprocessing_view = \ + target_locals_before_postprocessing_view = \ self.run_opencl_fft(actx, target_locals_before_postprocessing_view, inverse=True, wait_for=wait_for) - wait_for.append(get_native_event(evt_fft)) postprocess_local_kernel( actx, @@ -896,9 +894,6 @@ def multipole_to_local(self, **self.kernel_extra_kwargs, ) - if self.tree_indep.m2l_translation.use_fft: - postprocess_evts.append(evt_fft) - timing_events = preprocess_evts + translate_evts + postprocess_evts return (local_exps, SumpyTimingFuture(actx.queue, timing_events)) diff --git a/sumpy/tools.py b/sumpy/tools.py index 74ae92c27..fa0b050fb 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -1195,8 +1195,8 @@ def run_opencl_fft(actx: PyOpenCLArrayContext, app, backend = fft_app if backend == FFTBackend.loopy: - evt, (output_vec,) = app(actx.queue, y=input_vec, wait_for=wait_for) - return (evt, output_vec) + evt, output_vec = app(actx.queue, y=input_vec, wait_for=wait_for) + return (evt, output_vec["x"]) elif backend == FFTBackend.pyvkfft: if wait_for is None: wait_for = [] diff --git a/test/test_fmm.py b/test/test_fmm.py index 834e9467e..bd27e8ede 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -319,6 +319,7 @@ def fmm_level_to_order(kernel, kernel_args, tree, lev): (weights,) = wrangler.distribute_source_weights((weights,), None) local_result, _ = wrangler.form_locals( + actx, trav.level_start_target_or_target_parent_box_nrs, trav.target_or_target_parent_boxes, trav.from_sep_bigger_starts, @@ -399,7 +400,7 @@ def test_unified_single_and_double(actx_factory, visualize=False): for source_kernels, strength_usage in zip(source_kernel_vecs, strength_usages): source_extra_kwargs = {} if deriv_knl in source_kernels: - source_extra_kwargs["dir_vec"] = dir_vec + source_extra_kwargs["dir_vec"] = actx.from_numpy(dir_vec) tree_indep = SumpyTreeIndependentDataForWrangler( actx, partial(mpole_expn_class, knl), @@ -522,7 +523,7 @@ def test_sumpy_fmm_exclude_self(actx_factory, visualize=False): weights = actx.from_numpy(rng.random(nsources, dtype=np.float64)) target_to_source = np.arange(tree.ntargets, dtype=np.int32) - self_extra_kwargs = {"target_to_source": target_to_source} + self_extra_kwargs = {"target_to_source": actx.from_numpy(target_to_source)} target_kernels = [knl] @@ -542,11 +543,11 @@ def test_sumpy_fmm_exclude_self(actx_factory, visualize=False): pot, = drive_fmm(actx, wrangler, (weights,)) from sumpy import P2P - p2p = P2P(actx, target_kernels, exclude_self=True) - evt, (ref_pot,) = p2p(actx, sources, sources, (weights,), **self_extra_kwargs) + p2p = P2P(target_kernels, exclude_self=True) + ref_pot = p2p(actx, sources, sources, (weights,), **self_extra_kwargs) pot = actx.to_numpy(pot) - ref_pot = actx.to_numpy(ref_pot) + ref_pot = actx.to_numpy(ref_pot["result_s0"]) rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("order %d -> relative l2 error: %g", order, rel_err) @@ -588,7 +589,7 @@ def test_sumpy_axis_source_derivative(actx_factory, visualize=False): weights = actx.from_numpy(rng.random(nsources, dtype=np.float64)) target_to_source = np.arange(tree.ntargets, dtype=np.int32) - self_extra_kwargs = {"target_to_source": target_to_source} + self_extra_kwargs = {"target_to_source": actx.from_numpy(target_to_source)} from sumpy.kernel import AxisTargetDerivative, AxisSourceDerivative @@ -656,7 +657,7 @@ def test_sumpy_target_point_multiplier(actx_factory, deriv_axes, visualize=False weights = actx.from_numpy(rng.random(nsources, dtype=np.float64)) target_to_source = np.arange(tree.ntargets, dtype=np.int32) - self_extra_kwargs = {"target_to_source": target_to_source} + self_extra_kwargs = {"target_to_source": actx.from_numpy(target_to_source)} from sumpy.kernel import TargetPointMultiplier, AxisTargetDerivative From ddaa4ad9d5ed475cc4a8b1a8db636f92b3f12998 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 17 Sep 2022 21:13:39 +0300 Subject: [PATCH 30/59] port distributed to arraycontext --- sumpy/distributed.py | 66 ++++++++++++++++++++++------------------ test/test_distributed.py | 56 +++++++++++++++++++--------------- 2 files changed, 68 insertions(+), 54 deletions(-) diff --git a/sumpy/distributed.py b/sumpy/distributed.py index 1707e5e6a..ee257b7e2 100644 --- a/sumpy/distributed.py +++ b/sumpy/distributed.py @@ -20,64 +20,71 @@ THE SOFTWARE. """ -from boxtree.distributed.calculation import DistributedExpansionWrangler +from boxtree.distributed.calculation import DistributedExpansionWranglerMixin + from sumpy.fmm import SumpyExpansionWrangler -import pyopencl as cl +from sumpy.array_context import PyOpenCLArrayContext class DistributedSumpyExpansionWrangler( - DistributedExpansionWrangler, SumpyExpansionWrangler): + DistributedExpansionWranglerMixin, SumpyExpansionWrangler): def __init__( - self, context, comm, tree_indep, local_traversal, global_traversal, + self, actx: PyOpenCLArrayContext, + comm, tree_indep, local_traversal, global_traversal, dtype, fmm_level_to_order, communicate_mpoles_via_allreduce=False, **kwarg): - DistributedExpansionWrangler.__init__( - self, context, comm, global_traversal, True, - communicate_mpoles_via_allreduce=communicate_mpoles_via_allreduce) SumpyExpansionWrangler.__init__( self, tree_indep, local_traversal, dtype, fmm_level_to_order, **kwarg) - def distribute_source_weights(self, src_weight_vecs, src_idx_all_ranks): - src_weight_vecs_host = [src_weight.get() for src_weight in src_weight_vecs] + self.comm = comm + self.traversal_in_device_memory = True + self.global_traversal = global_traversal + self.communicate_mpoles_via_allreduce = communicate_mpoles_via_allreduce + + def distribute_source_weights(self, + actx: PyOpenCLArrayContext, src_weight_vecs, src_idx_all_ranks): + src_weight_vecs_host = [ + actx.to_numpy(src_weight) for src_weight in src_weight_vecs + ] local_src_weight_vecs_host = super().distribute_source_weights( - src_weight_vecs_host, src_idx_all_ranks) + actx, src_weight_vecs_host, src_idx_all_ranks) local_src_weight_vecs_device = [ - cl.array.to_device(src_weight.queue, local_src_weight) - for local_src_weight, src_weight in - zip(local_src_weight_vecs_host, src_weight_vecs)] + actx.from_numpy(local_src_weight) + for local_src_weight in local_src_weight_vecs_host] return local_src_weight_vecs_device - def gather_potential_results(self, potentials, tgt_idx_all_ranks): - mpi_rank = self.comm.Get_rank() - - potentials_host_vec = [potentials_dev.get() for potentials_dev in potentials] + def gather_potential_results(self, + actx: PyOpenCLArrayContext, potentials, tgt_idx_all_ranks): + potentials_host_vec = [ + actx.to_numpy(potentials_dev) for potentials_dev in potentials + ] gathered_potentials_host_vec = [] for potentials_host in potentials_host_vec: gathered_potentials_host_vec.append( - super().gather_potential_results(potentials_host, tgt_idx_all_ranks)) + super().gather_potential_results( + actx, potentials_host, tgt_idx_all_ranks)) - if mpi_rank == 0: + if self.is_mpi_root: from pytools.obj_array import make_obj_array return make_obj_array([ - cl.array.to_device(potentials_dev.queue, gathered_potentials_host) - for gathered_potentials_host, potentials_dev in - zip(gathered_potentials_host_vec, potentials)]) + actx.from_numpy(gathered_potentials_host) + for gathered_potentials_host in gathered_potentials_host_vec + ]) else: return None def reorder_sources(self, source_array): - if self.comm.Get_rank() == 0: - return source_array.with_queue(source_array.queue)[ - self.global_traversal.tree.user_source_ids] + if self.is_mpi_root: + return source_array[self.global_traversal.tree.user_source_ids] else: return source_array def reorder_potentials(self, potentials): - if self.comm.Get_rank() == 0: + if self.is_mpi_root: from pytools.obj_array import obj_array_vectorize import numpy as np assert ( @@ -91,8 +98,9 @@ def reorder(x): else: return None - def communicate_mpoles(self, mpole_exps, return_stats=False): - mpole_exps_host = mpole_exps.get() - stats = super().communicate_mpoles(mpole_exps_host, return_stats) + def communicate_mpoles(self, + actx: PyOpenCLArrayContext, mpole_exps, return_stats=False): + mpole_exps_host = actx.to_numpy(mpole_exps) + stats = super().communicate_mpoles(actx, mpole_exps_host, return_stats) mpole_exps[:] = mpole_exps_host return stats diff --git a/test/test_distributed.py b/test/test_distributed.py index dd21b224f..26313fa96 100644 --- a/test/test_distributed.py +++ b/test/test_distributed.py @@ -21,10 +21,21 @@ """ import os +import pytest from functools import partial -import pyopencl as cl + import numpy as np -import pytest + +from arraycontext import pytest_generate_tests_for_array_contexts +from sumpy.array_context import ( # noqa: F401 + PytestPyOpenCLArrayContextFactory, _acf) + +import logging +logger = logging.getLogger(__name__) + +pytest_generate_tests = pytest_generate_tests_for_array_contexts([ + PytestPyOpenCLArrayContextFactory, + ]) # Note: Do not import mpi4py.MPI object at the module level, because OpenMPI does not # support recursive invocations. @@ -51,14 +62,13 @@ def _test_against_single_rank( set_cache_dir(mpi_rank) # Configure array context - cl_context = cl.create_some_context() - queue = cl.CommandQueue(cl_context) + actx = _acf() def fmm_level_to_order(base_kernel, kernel_arg_set, tree, level): return max(level, 3) from boxtree.traversal import FMMTraversalBuilder - traversal_builder = FMMTraversalBuilder(cl_context, well_sep_is_n_away=2) + traversal_builder = FMMTraversalBuilder(actx, well_sep_is_n_away=2) from sumpy.kernel import LaplaceKernel from sumpy.expansion import DefaultExpansionFactory @@ -72,34 +82,30 @@ def fmm_level_to_order(base_kernel, kernel_arg_set, tree, level): from sumpy.fmm import SumpyTreeIndependentDataForWrangler tree_indep = SumpyTreeIndependentDataForWrangler( - cl_context, multipole_expansion_factory, local_expansion_factory, [kernel]) + actx, multipole_expansion_factory, local_expansion_factory, [kernel]) global_tree_dev = None - sources_weights = cl.array.empty(queue, 0, dtype=dtype) + sources_weights = actx.empty(0, dtype=dtype) if mpi_rank == 0: # Generate random particles and source weights from boxtree.tools import make_normal_particle_array as p_normal - sources = p_normal(queue, nsources, dims, dtype, seed=15) - targets = p_normal(queue, ntargets, dims, dtype, seed=18) + sources = p_normal(actx, nsources, dims, dtype, seed=15) + targets = p_normal(actx, ntargets, dims, dtype, seed=18) # FIXME: Use arraycontext instead of raw PyOpenCL arrays - from pyopencl.clrandom import PhiloxGenerator - rng = PhiloxGenerator(cl_context, seed=20) - sources_weights = rng.uniform(queue, nsources, dtype=np.float64) - - rng = PhiloxGenerator(cl_context, seed=22) - target_radii = rng.uniform( - queue, ntargets, a=0, b=0.05, dtype=np.float64) + rng = np.random.default_rng(20) + sources_weights = actx.from_numpy(rng.random(nsources, dtype=np.float64)) + target_radii = actx.from_numpy(0.05 * rng.random(ntargets, dtype=np.float64)) # Build the tree and interaction lists from boxtree import TreeBuilder - tb = TreeBuilder(cl_context) + tb = TreeBuilder(actx) global_tree_dev, _ = tb( - queue, sources, targets=targets, target_radii=target_radii, + actx, sources, targets=targets, target_radii=target_radii, stick_out_factor=0.25, max_particles_in_box=30, debug=True) - global_trav_dev, _ = traversal_builder(queue, global_tree_dev, debug=True) + global_trav_dev, _ = traversal_builder(actx, global_tree_dev, debug=True) from sumpy.fmm import SumpyExpansionWrangler wrangler = SumpyExpansionWrangler(tree_indep, global_trav_dev, dtype, @@ -107,32 +113,32 @@ def fmm_level_to_order(base_kernel, kernel_arg_set, tree, level): # Compute FMM with one MPI rank from boxtree.fmm import drive_fmm - shmem_potential = drive_fmm(wrangler, [sources_weights]) + shmem_potential = drive_fmm(actx, wrangler, [sources_weights]) # Compute FMM using the distributed implementation def wrangler_factory(local_traversal, global_traversal): from sumpy.distributed import DistributedSumpyExpansionWrangler return DistributedSumpyExpansionWrangler( - cl_context, comm, tree_indep, local_traversal, global_traversal, dtype, + actx, comm, tree_indep, local_traversal, global_traversal, dtype, fmm_level_to_order, communicate_mpoles_via_allreduce=communicate_mpoles_via_allreduce) from boxtree.distributed import DistributedFMMRunner distribued_fmm_info = DistributedFMMRunner( - queue, global_tree_dev, traversal_builder, wrangler_factory, comm=comm) + actx, global_tree_dev, traversal_builder, wrangler_factory, comm=comm) timing_data = {} distributed_potential = distribued_fmm_info.drive_dfmm( - [sources_weights], timing_data=timing_data) + actx, [sources_weights], timing_data=timing_data) assert timing_data if mpi_rank == 0: assert shmem_potential.shape == (1,) assert distributed_potential.shape == (1,) - shmem_potential = shmem_potential[0].get() - distributed_potential = distributed_potential[0].get() + shmem_potential = actx.to_numpy(shmem_potential[0]) + distributed_potential = actx.to_numpy(distributed_potential[0]) error = (np.linalg.norm(distributed_potential - shmem_potential, ord=np.inf) / np.linalg.norm(shmem_potential, ord=np.inf)) From be9c909de79a2ead45641069dc11e0effdeb0705 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sun, 18 Sep 2022 09:33:23 +0300 Subject: [PATCH 31/59] add missing actx --- test/test_fmm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_fmm.py b/test/test_fmm.py index bd27e8ede..803ac3c7a 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -316,7 +316,7 @@ def fmm_level_to_order(kernel, kernel_args, tree, lev): kernel_extra_kwargs=extra_kwargs) weights = wrangler.reorder_sources(weights) - (weights,) = wrangler.distribute_source_weights((weights,), None) + (weights,) = wrangler.distribute_source_weights(actx, (weights,), None) local_result, _ = wrangler.form_locals( actx, From fd7e61f6ce32d2b62f644ae8edaaf0aa6b9db140 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Wed, 21 Sep 2022 17:34:25 +0300 Subject: [PATCH 32/59] fix kernel return values --- examples/curve-pot.py | 8 ++++---- sumpy/e2e.py | 26 +++++++++++++++++++------- sumpy/e2p.py | 11 +++++++++-- sumpy/fmm.py | 8 ++++---- sumpy/p2e.py | 4 +++- sumpy/p2p.py | 16 ++++++++++++---- sumpy/qbx.py | 14 +++++++++++--- sumpy/toys.py | 12 ++++++------ test/test_fmm.py | 8 ++++---- test/test_kernels.py | 38 +++++++++++++++++++------------------- test/test_matrixgen.py | 24 ++++++++++++------------ test/test_qbx.py | 12 ++++++------ 12 files changed, 109 insertions(+), 72 deletions(-) diff --git a/examples/curve-pot.py b/examples/curve-pot.py index 3ac66a276..a95a0a0c1 100644 --- a/examples/curve-pot.py +++ b/examples/curve-pot.py @@ -206,11 +206,11 @@ def apply_lpot(x): density = np.cos(mode_nr*2*np.pi*native_t).astype(np.complex128) strengths = actx.from_numpy(native_curve.speed * native_weights * density) - result = p2p(actx, + result, = p2p(actx, sources, targets, [strengths], **volpot_kwargs) - vol_pot = actx.to_numpy(result["result_s0"]) + vol_pot = actx.to_numpy(result) ovsmp_targets = actx.from_numpy(ovsmp_curve.pos) @@ -218,14 +218,14 @@ def apply_lpot(x): ovsmp_strengths = actx.from_numpy( ovsmp_curve.speed * ovsmp_weights * ovsmp_density) - result = lpot(actx, + result, = lpot(actx, targets, ovsmp_targets, actx.from_numpy(centers), [ovsmp_strengths], expansion_radii=actx.from_numpy(np.ones(centers.shape[1])), **lpot_kwargs) - curve_pot = actx.to_numpy(result["result_0"]) + curve_pot = actx.to_numpy(result) # }}} diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 142f01dce..58cc01352 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -260,12 +260,13 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) knl = self.get_cached_optimized_kernel() - return actx.call_loopy( + result = actx.call_loopy( knl, centers=centers, src_rscale=src_rscale, tgt_rscale=tgt_rscale, **kwargs) + return result["tgt_expansions"] # }}} @@ -500,13 +501,15 @@ def __call__(self, actx, **kwargs): src_expansions = kwargs.pop("src_expansions") knl = self.get_cached_optimized_kernel(result_dtype=src_expansions.dtype) - return actx.call_loopy( + result = actx.call_loopy( knl, src_expansions=src_expansions, centers=centers, src_rscale=src_rscale, tgt_rscale=tgt_rscale, **kwargs) + return result["tgt_expansions"] + # }}} @@ -608,7 +611,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): result_dtype = m2l_translation_classes_dependent_data.dtype knl = self.get_cached_optimized_kernel(result_dtype=result_dtype) - return actx.call_loopy( + result = actx.call_loopy( knl, src_rscale=src_rscale, m2l_translation_vectors=m2l_translation_vectors, @@ -616,6 +619,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): m2l_translation_classes_dependent_data), **kwargs) + return result["tgt_expansions"] # }}} @@ -693,11 +697,13 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): result_dtype = preprocessed_src_expansions.dtype knl = self.get_cached_optimized_kernel(result_dtype=result_dtype) - return actx.call_loopy( + result = actx.call_loopy( knl, preprocessed_src_expansions=preprocessed_src_expansions, **kwargs) + return result["tgt_expansions"] + # }}} @@ -779,11 +785,13 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): result_dtype = tgt_expansions.dtype knl = self.get_cached_optimized_kernel(result_dtype=result_dtype) - return actx.call_loopy( + result = actx.call_loopy( knl, tgt_expansions=tgt_expansions, **kwargs) + return result["tgt_expansions"] + # }}} @@ -895,12 +903,14 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) knl = self.get_cached_optimized_kernel() - return actx.call_loopy( + result = actx.call_loopy( knl, centers=centers, src_rscale=src_rscale, tgt_rscale=tgt_rscale, **kwargs) + return result["tgt_expansions"] + # }}} @@ -998,12 +1008,14 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) knl = self.get_cached_optimized_kernel() - return actx.call_loopy( + result = actx.call_loopy( knl, centers=centers, src_rscale=src_rscale, tgt_rscale=tgt_rscale, **kwargs) + return result["tgt_expansions"] + # }}} # vim: foldmethod=marker diff --git a/sumpy/e2p.py b/sumpy/e2p.py index c3ad116c1..609a5e3e2 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -23,6 +23,8 @@ import numpy as np import loopy as lp +from pytools.obj_array import make_obj_array + from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program from sumpy.tools import KernelCacheMixin @@ -215,10 +217,13 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): rscale = centers.dtype.type(kwargs.pop("rscale")) knl = self.get_cached_optimized_kernel() - return actx.call_loopy( + result = actx.call_loopy( knl, centers=centers, rscale=rscale, **kwargs) + # FIXME: cleaner way to get the names out? + return make_obj_array([result[f"result_s{i}"] for i in range()]) + # }}} @@ -319,10 +324,12 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): rscale = centers.dtype.type(kwargs.pop("rscale")) knl = self.get_cached_optimized_kernel() - return actx.call_loopy( + result = actx.call_loopy( knl, centers=centers, rscale=rscale, **kwargs) + return make_obj_array([result[f"result_s{i}"] for i in range()]) + # }}} # vim: foldmethod=marker diff --git a/sumpy/fmm.py b/sumpy/fmm.py index b4c750cfb..59c4807cc 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -605,7 +605,7 @@ def form_multipoles(self, tgt_base_ibox=level_start_ibox, rscale=self.level_to_rscale(lev), - **kwargs)["tgt_expansions"] + **kwargs) assert mpoles_res is mpoles_view @@ -660,7 +660,7 @@ def coarsen_multipoles(self, src_rscale=self.level_to_rscale(source_level), tgt_rscale=self.level_to_rscale(target_level), - **self.kernel_extra_kwargs)["tgt_expansions"] + **self.kernel_extra_kwargs) assert mpoles_res is target_mpoles_view @@ -974,7 +974,7 @@ def form_locals(self, rscale=self.level_to_rscale(lev), - **kwargs)["tgt_expansions"] + **kwargs) assert result is target_local_exps_view @@ -1017,7 +1017,7 @@ def refine_locals(self, src_rscale=self.level_to_rscale(source_lev), tgt_rscale=self.level_to_rscale(target_lev), - **self.kernel_extra_kwargs)["tgt_expansions"] + **self.kernel_extra_kwargs) assert local_exps_res is target_local_exps_view diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 8a0e54a2a..b67bbd826 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -148,11 +148,13 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): sources_is_obj_array=is_obj_array_like(sources), centers_is_obj_array=is_obj_array_like(centers)) - return actx.call_loopy( + result = actx.call_loopy( knl, sources=sources, centers=centers, rscale=rscale, **kwargs) + return result["tgt_expansions"] + # }}} diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 4370033ff..a12a55922 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -26,6 +26,8 @@ import numpy as np import loopy as lp +from pytools.obj_array import make_obj_array + from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program from sumpy.tools import KernelComputation, KernelCacheMixin, is_obj_array_like @@ -257,11 +259,13 @@ def __call__(self, actx: PyOpenCLArrayContext, targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) - return actx.call_loopy( + result = actx.call_loopy( knl, sources=sources, targets=targets, strength=strength, **kwargs) + return make_obj_array([result[f"result_s{i}"] for i in range()]) + # }}} @@ -319,7 +323,8 @@ def __call__(self, actx: PyOpenCLArrayContext, targets, sources, **kwargs): targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) - return actx.call_loopy(knl, sources=sources, targets=targets, **kwargs) + result = actx.call_loopy(knl, sources=sources, targets=targets, **kwargs) + return make_obj_array([result[f"result_s{i}"] for i in range()]) # }}} @@ -429,13 +434,15 @@ def __call__(self, actx: PyOpenCLArrayContext, targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) - return actx.call_loopy( + result = actx.call_loopy( knl, targets=targets, sources=sources, tgtindices=tgtindices, srcindices=srcindices, **kwargs) + return make_obj_array([result[f"result_s{i}"] for i in range()]) + # }}} @@ -677,7 +684,8 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): max_ntargets_in_one_box=max_ntargets_in_one_box, is_cpu=actx.queue.device.type & cl.device_type.CPU) - return actx.call_loopy(knl, **kwargs) + result = actx.call_loopy(knl, **kwargs) + return make_obj_array([result[f"result_s{i}"] for i in range()]) # }}} diff --git a/sumpy/qbx.py b/sumpy/qbx.py index e44631e89..a20a85401 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -29,6 +29,8 @@ import loopy as lp from pytools import memoize_method +from pytools.obj_array import make_obj_array + from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program, is_cl_cpu from sumpy.tools import KernelComputation, KernelCacheMixin, is_obj_array_like @@ -304,11 +306,13 @@ def __call__(self, actx: PyOpenCLArrayContext, for i, dens in enumerate(strengths): kwargs[f"strength_{i}"] = dens - return actx.call_loopy( + result = actx.call_loopy( knl, sources=sources, targets=targets, center=centers, expansion_radii=expansion_radii, **kwargs) + return make_obj_array([result[f"result_s{i}"] for i in range()]) + # }}} @@ -373,11 +377,13 @@ def __call__(self, actx: PyOpenCLArrayContext, sources_is_obj_array=is_obj_array_like(sources), centers_is_obj_array=is_obj_array_like(centers)) - return actx.call_loopy( + result = actx.call_loopy( knl, sources=sources, targets=targets, center=centers, expansion_radii=expansion_radii, **kwargs) + return make_obj_array([result[f"result_{i}"] for i in range()]) + # }}} @@ -493,7 +499,7 @@ def __call__(self, actx: PyOpenCLArrayContext, sources_is_obj_array=is_obj_array_like(sources), centers_is_obj_array=is_obj_array_like(centers)) - return actx.call_loopy( + result = actx.call_loopy( knl, sources=sources, targets=targets, @@ -502,6 +508,8 @@ def __call__(self, actx: PyOpenCLArrayContext, tgtindices=tgtindices, srcindices=srcindices, **kwargs) + return make_obj_array([result[f"result_{i}"] for i in range()]) + # }}} # }}} diff --git a/sumpy/toys.py b/sumpy/toys.py index 6d7757e04..bf20242e5 100644 --- a/sumpy/toys.py +++ b/sumpy/toys.py @@ -211,7 +211,7 @@ def _p2e(actx: PyOpenCLArrayContext, nboxes=1, tgt_base_ibox=0, - **toy_ctx.extra_source_and_kernel_kwargs)["tgt_expansions"] + **toy_ctx.extra_source_and_kernel_kwargs) return expn_class( toy_ctx, center, rscale, order, actx.to_numpy(coeffs[0]), @@ -233,7 +233,7 @@ def _e2p(actx: PyOpenCLArrayContext, psource, targets, e2p): coeffs = actx.from_numpy(np.array([psource.coeffs])) from pytools.obj_array import make_obj_array - pot = e2p( + pot, = e2p( actx, src_expansions=coeffs, src_base_ibox=0, @@ -244,7 +244,7 @@ def _e2p(actx: PyOpenCLArrayContext, psource, targets, e2p): rscale=psource.rscale, targets=actx.from_numpy(make_obj_array(targets)), - **toy_ctx.extra_kernel_kwargs)["result_s0"] + **toy_ctx.extra_kernel_kwargs) return actx.to_numpy(pot) @@ -282,7 +282,7 @@ def _e2e(actx: PyOpenCLArrayContext, src_rscale=psource.rscale, tgt_rscale=to_rscale, - **toy_ctx.extra_kernel_kwargs)["tgt_expansions"] + **toy_ctx.extra_kernel_kwargs) return expn_class( toy_ctx, to_center, to_rscale, to_order, actx.to_numpy(to_coeffs[1]), @@ -410,14 +410,14 @@ def __init__(self, toy_ctx, points, weights, center=None): self._center = center def eval(self, actx: PyOpenCLArrayContext, targets): - potential = self.toy_ctx.get_p2p()( + potential, = self.toy_ctx.get_p2p()( actx, actx.from_numpy(targets), actx.from_numpy(self.points), [actx.from_numpy(self.weights)], **self.toy_ctx.extra_source_and_kernel_kwargs) - return actx.to_numpy(potential["result_s0"]) + return actx.to_numpy(potential) @property def center(self): diff --git a/test/test_fmm.py b/test/test_fmm.py index 803ac3c7a..f5e0c8d3e 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -238,10 +238,10 @@ def fmm_level_to_order(kernel, kernel_args, tree, lev): from sumpy import P2P p2p = P2P(target_kernels, exclude_self=False) - ref_pot = p2p(actx, targets, sources, (weights,), **extra_kwargs) + ref_pot, = p2p(actx, targets, sources, (weights,), **extra_kwargs) pot = actx.to_numpy(pot) - ref_pot = actx.to_numpy(ref_pot["result_s0"]) + ref_pot = actx.to_numpy(ref_pot) rel_err = la.norm(pot - ref_pot, np.inf) / la.norm(ref_pot, np.inf) logger.info("order %d -> relative l2 error: %g", order, rel_err) @@ -544,10 +544,10 @@ def test_sumpy_fmm_exclude_self(actx_factory, visualize=False): from sumpy import P2P p2p = P2P(target_kernels, exclude_self=True) - ref_pot = p2p(actx, sources, sources, (weights,), **self_extra_kwargs) + ref_pot, = p2p(actx, sources, sources, (weights,), **self_extra_kwargs) pot = actx.to_numpy(pot) - ref_pot = actx.to_numpy(ref_pot["result_s0"]) + ref_pot = actx.to_numpy(ref_pot) rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot) logger.info("order %d -> relative l2 error: %g", order, rel_err) diff --git a/test/test_kernels.py b/test/test_kernels.py index 4b621faea..afef0b7fb 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -84,14 +84,14 @@ def test_p2p(actx_factory, exclude_self): extra_kwargs["target_to_source"] = ( actx.from_numpy(np.arange(n, dtype=np.int32))) - result = knl( + result, = knl( actx, actx.from_numpy(targets), actx.from_numpy(sources), [actx.from_numpy(strengths)], **extra_kwargs) - potential = actx.to_numpy(result["result_s0"]) + potential = actx.to_numpy(result) potential_ref = np.empty_like(potential) targets = targets.T @@ -196,7 +196,7 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): tgt_base_ibox=0, rscale=rscale, dir_vec=dir_vec, - **extra_kwargs)["tgt_expansions"] + **extra_kwargs) actual_result = actx.to_numpy(mpoles) # apply p2e separately @@ -219,7 +219,7 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): nboxes=1, tgt_base_ibox=0, rscale=rscale, - **extra_source_kwargs)["tgt_expansions"] + **extra_source_kwargs) expected_result += actx.to_numpy(mpoles) norm = la.norm(actual_result - expected_result) / la.norm(expected_result) @@ -354,7 +354,7 @@ def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative nboxes=1, tgt_base_ibox=0, rscale=rscale, - **extra_source_kwargs)["tgt_expansions"] + **extra_source_kwargs) # }}} @@ -366,7 +366,7 @@ def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative box_target_counts_nonchild = actx.from_numpy( np.array([ntargets], dtype=np.int32)) - result = e2p( + pot, grad_x = e2p( actx, src_expansions=mpoles, src_base_ibox=0, @@ -378,20 +378,20 @@ def test_p2e2p(actx_factory, base_knl, expn_class, order, with_source_derivative rscale=rscale, **extra_kwargs) - pot = actx.to_numpy(result["result_s0"]) - grad_x = actx.to_numpy(result["result_s1"]) + pot = actx.to_numpy(pot) + grad_x = actx.to_numpy(grad_x) # }}} # {{{ compute (direct) reference solution - result = p2p( + pot_direct, grad_x_direct = p2p( actx, targets, sources, (strengths,), **extra_source_kwargs) - pot_direct = actx.to_numpy(result["result_s0"]) - grad_x_direct = actx.to_numpy(result["result_s1"]) + pot_direct = actx.to_numpy(pot_direct) + grad_x_direct = actx.to_numpy(grad_x_direct) err_pot = la.norm((pot - pot_direct)/res**2) err_grad_x = la.norm((grad_x - grad_x_direct)/res**2) @@ -559,7 +559,7 @@ def eval_at(e2p, source_box_nr, rscale): e2p_box_target_counts_nonchild = actx.zeros(4, dtype=np.int32) e2p_box_target_counts_nonchild[source_box_nr] = ntargets - pot = e2p( + pot, = e2p( actx, src_expansions=mpoles, src_base_ibox=0, @@ -572,7 +572,7 @@ def eval_at(e2p, source_box_nr, rscale): **extra_kwargs ) - return actx.to_numpy(pot["result_s0"]) + return actx.to_numpy(pot) m2l_factory = NonFFTM2LTranslationClassFactory() m2l_translation = m2l_factory.get_m2l_translation_class(knl, local_expn_class)() @@ -595,11 +595,11 @@ def eval_at(e2p, source_box_nr, rscale): # {{{ compute (direct) reference solution - pot_direct = p2p( + pot_direct, = p2p( actx, targets, sources, (strengths,), **extra_kwargs) - pot_direct = actx.to_numpy(pot_direct["result_s0"]) + pot_direct = actx.to_numpy(pot_direct) # }}} @@ -627,7 +627,7 @@ def eval_at(e2p, source_box_nr, rscale): nboxes=nboxes, rscale=m1_rscale, tgt_base_ibox=0, - **extra_kwargs)["tgt_expansions"] + **extra_kwargs) # }}} @@ -664,7 +664,7 @@ def eval_at(e2p, source_box_nr, rscale): src_rscale=m1_rscale, tgt_rscale=m2_rscale, - **extra_kwargs)["tgt_expansions"] + **extra_kwargs) # }}} @@ -698,7 +698,7 @@ def eval_at(e2p, source_box_nr, rscale): src_rscale=m2_rscale, tgt_rscale=l1_rscale, - **extra_kwargs)["tgt_expansions"] + **extra_kwargs) # }}} @@ -732,7 +732,7 @@ def eval_at(e2p, source_box_nr, rscale): src_rscale=l1_rscale, tgt_rscale=l2_rscale, - **extra_kwargs)["tgt_expansions"] + **extra_kwargs) # }}} diff --git a/test/test_matrixgen.py b/test/test_matrixgen.py index c3a67b9f4..08d6e12d5 100644 --- a/test/test_matrixgen.py +++ b/test/test_matrixgen.py @@ -144,29 +144,29 @@ def test_qbx_direct(actx_factory, factor, lpot_id, visualize=False): actx.from_numpy(make_obj_array(np.ones((ndim, n)))) ) - result_lpot = lpot(actx, + result_lpot, = lpot(actx, targets=targets, sources=sources, centers=centers, expansion_radii=expansion_radii, - strengths=strengths, **extra_kwargs)["result_0"] + strengths=strengths, **extra_kwargs) result_lpot = actx.to_numpy(result_lpot) - mat = mat_gen(actx, + mat, = mat_gen(actx, targets=targets, sources=sources, centers=centers, - expansion_radii=expansion_radii, **extra_kwargs)["result_0"] + expansion_radii=expansion_radii, **extra_kwargs) mat = actx.to_numpy(mat) result_mat = mat @ actx.to_numpy(strengths[0]) - blk = blk_gen(actx, + blk, = blk_gen(actx, targets=targets, sources=sources, centers=centers, expansion_radii=expansion_radii, tgtindices=tgtindices, - srcindices=srcindices, **extra_kwargs)["result_0"] + srcindices=srcindices, **extra_kwargs) blk = actx.to_numpy(blk) tgtindices = actx.to_numpy(tgtindices) @@ -232,23 +232,23 @@ def test_p2p_direct(actx_factory, exclude_self, factor, lpot_id, visualize=False extra_kwargs["dsource_vec"] = ( actx.from_numpy(make_obj_array(np.ones((ndim, n))))) - result_lpot = lpot(actx, + result_lpot, = lpot(actx, targets=targets, sources=sources, - strength=strengths, **extra_kwargs)["result_s0"] + strength=strengths, **extra_kwargs) result_lpot = actx.to_numpy(result_lpot) - mat = mat_gen(actx, + mat, = mat_gen(actx, targets=targets, - sources=sources, **extra_kwargs)["result_0"] + sources=sources, **extra_kwargs) mat = actx.to_numpy(mat) result_mat = mat @ actx.to_numpy(strengths[0]) - blk = blk_gen(actx, + blk, = blk_gen(actx, targets=targets, sources=sources, tgtindices=tgtindices, - srcindices=srcindices, **extra_kwargs)["result_0"] + srcindices=srcindices, **extra_kwargs) blk = actx.to_numpy(blk) tgtindices = actx.to_numpy(tgtindices) diff --git a/test/test_qbx.py b/test/test_qbx.py index 2860dfb85..a9169b5cf 100644 --- a/test/test_qbx.py +++ b/test/test_qbx.py @@ -93,10 +93,10 @@ def test_direct_qbx_vs_eigval(actx_factory, expn_class, visualize=False): expansion_radii = actx.from_numpy(np.full(n, radius)) strengths = (sigma * h,) - result_qbx = lpot( + result_qbx, = lpot( actx, targets, sources, centers, strengths, - expansion_radii=expansion_radii)["result_0"] + expansion_radii=expansion_radii) error = actx.to_numpy( actx.np.linalg.norm(result_ref - result_qbx, np.inf)) @@ -170,12 +170,12 @@ def test_direct_qbx_vs_eigval_with_tgt_deriv( strengths = (sigma * h,) - result_qbx_dx = lpot_dx(actx, + result_qbx_dx, = lpot_dx(actx, targets, sources, centers, strengths, - expansion_radii=expansion_radii)["result_0"] - result_qbx_dy = lpot_dy(actx, + expansion_radii=expansion_radii) + result_qbx_dy, = lpot_dy(actx, targets, sources, centers, strengths, - expansion_radii=expansion_radii)["result_0"] + expansion_radii=expansion_radii) normals = unit_circle result_qbx = normals[0] * result_qbx_dx + normals[1] * result_qbx_dy From c3e35a5d77aabfb29311ba059c96cd50c67a146b Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Wed, 21 Sep 2022 17:44:50 +0300 Subject: [PATCH 33/59] actually loop over all results --- sumpy/e2p.py | 8 ++++++-- sumpy/p2p.py | 8 ++++---- sumpy/qbx.py | 12 ++++-------- sumpy/tools.py | 4 ++++ 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 609a5e3e2..4ad5283a7 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -69,6 +69,10 @@ def __init__(self, expansion, kernels, name=None): self.dim = expansion.dim + @property + def nresults(self): + return len(self.kernels) + def get_loopy_insns_and_result_names(self): import sumpy.symbolic as sym bvec = sym.make_sym_vector("b", self.dim) @@ -222,7 +226,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): centers=centers, rscale=rscale, **kwargs) # FIXME: cleaner way to get the names out? - return make_obj_array([result[f"result_s{i}"] for i in range()]) + return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) # }}} @@ -328,7 +332,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): knl, centers=centers, rscale=rscale, **kwargs) - return make_obj_array([result[f"result_s{i}"] for i in range()]) + return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) # }}} diff --git a/sumpy/p2p.py b/sumpy/p2p.py index a12a55922..df6247ee1 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -264,7 +264,7 @@ def __call__(self, actx: PyOpenCLArrayContext, sources=sources, targets=targets, strength=strength, **kwargs) - return make_obj_array([result[f"result_s{i}"] for i in range()]) + return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) # }}} @@ -324,7 +324,7 @@ def __call__(self, actx: PyOpenCLArrayContext, targets, sources, **kwargs): sources_is_obj_array=is_obj_array_like(sources)) result = actx.call_loopy(knl, sources=sources, targets=targets, **kwargs) - return make_obj_array([result[f"result_s{i}"] for i in range()]) + return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) # }}} @@ -441,7 +441,7 @@ def __call__(self, actx: PyOpenCLArrayContext, tgtindices=tgtindices, srcindices=srcindices, **kwargs) - return make_obj_array([result[f"result_s{i}"] for i in range()]) + return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) # }}} @@ -685,7 +685,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): is_cpu=actx.queue.device.type & cl.device_type.CPU) result = actx.call_loopy(knl, **kwargs) - return make_obj_array([result[f"result_s{i}"] for i in range()]) + return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) # }}} diff --git a/sumpy/qbx.py b/sumpy/qbx.py index a20a85401..a6c712110 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -139,7 +139,7 @@ def get_loopy_insns_and_result_names(self): coefficients = self._expand(sac, avec, bvec, rscale, isrc_sym) result_names = [self._evaluate(sac, avec, bvec, rscale, i, coefficients) - for i in range(len(self.target_kernels))] + for i in range(self.nresults)] logger.info("compute expansion expressions: done") @@ -311,7 +311,7 @@ def __call__(self, actx: PyOpenCLArrayContext, sources=sources, targets=targets, center=centers, expansion_radii=expansion_radii, **kwargs) - return make_obj_array([result[f"result_s{i}"] for i in range()]) + return make_obj_array([result[f"result_{i}"] for i in range(self.nresults)]) # }}} @@ -382,7 +382,7 @@ def __call__(self, actx: PyOpenCLArrayContext, sources=sources, targets=targets, center=centers, expansion_radii=expansion_radii, **kwargs) - return make_obj_array([result[f"result_{i}"] for i in range()]) + return make_obj_array([result[f"result_{i}"] for i in range(self.nresults)]) # }}} @@ -508,7 +508,7 @@ def __call__(self, actx: PyOpenCLArrayContext, tgtindices=tgtindices, srcindices=srcindices, **kwargs) - return make_obj_array([result[f"result_{i}"] for i in range()]) + return make_obj_array([result[f"result_{i}"] for i in range(self.nresults)]) # }}} @@ -657,7 +657,6 @@ def normal(self): self.arguments["normal"] = ( lp.GlobalArg("normal", self.geometry_dtype, shape=("ntargets", self.dim), order="C")) - from pytools.obj_array import make_obj_array return make_obj_array([ prim.parse(f"normal[itgt, {i}]") for i in range(self.dim)]) @@ -669,7 +668,6 @@ def tangent(self): self.arguments["tangent"] = ( lp.GlobalArg("tangent", self.geometry_dtype, shape=("ntargets", self.dim), order="C")) - from pytools.obj_array import make_obj_array return make_obj_array([ prim.parse(f"tangent[itgt, {i}]") for i in range(self.dim)]) @@ -692,7 +690,6 @@ def src_derivative_dir(self): lp.GlobalArg("src_derivative_dir", self.geometry_dtype, shape=("ntargets", self.dim), order="C")) - from pytools.obj_array import make_obj_array return make_obj_array([ prim.parse(f"src_derivative_dir[itgt, {i}]") for i in range(self.dim)]) @@ -705,7 +702,6 @@ def tgt_derivative_dir(self): lp.GlobalArg("tgt_derivative_dir", self.geometry_dtype, shape=("ntargets", self.dim), order="C")) - from pytools.obj_array import make_obj_array return make_obj_array([ prim.parse(f"tgt_derivative_dir[itgt, {i}]") for i in range(self.dim)]) diff --git a/sumpy/tools.py b/sumpy/tools.py index fa0b050fb..b79f26944 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -602,6 +602,10 @@ def __init__(self, target_kernels, source_kernels, strength_usage, self.name = name or self.default_name + @property + def nresults(self): + return len(self.target_kernels) + def get_kernel_scaling_assignments(self): from sumpy.symbolic import SympyToPymbolicMapper sympy_conv = SympyToPymbolicMapper() From f6d6e9d2adceec77fe9bd48fc8ea074c48257bc9 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Wed, 21 Sep 2022 19:02:59 +0300 Subject: [PATCH 34/59] back up some more dictionary kernel accesses --- sumpy/e2e.py | 5 +++-- sumpy/fmm.py | 12 ++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 58cc01352..f67750e58 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -619,7 +619,8 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): m2l_translation_classes_dependent_data), **kwargs) - return result["tgt_expansions"] + return result["m2l_translation_classes_dependent_data"] + # }}} @@ -702,7 +703,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): preprocessed_src_expansions=preprocessed_src_expansions, **kwargs) - return result["tgt_expansions"] + return result["preprocessed_src_expansions"] # }}} diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 59c4807cc..15594ebff 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -687,8 +687,8 @@ def eval_direct(self, max_ntargets_in_one_box=self.max_ntargets_in_one_box, **kwargs) - for i in range(pot.size): - assert pot_res[f"result_s{i}"] is pot[i] + for pot_i, pot_res_i in zip(pot, pot_res): + assert pot_i is pot_res_i return pot, SumpyTimingFuture(actx.queue, []) @@ -934,8 +934,8 @@ def eval_multipoles(self, **kwargs) - for i in range(pot.size): - assert pot_res[f"result_s{i}"] is pot[i] + for pot_i, pot_res_i in zip(pot, pot_res): + assert pot_i is pot_res_i return pot, SumpyTimingFuture(actx.queue, []) @@ -1056,8 +1056,8 @@ def eval_locals(self, **kwargs) - for i in range(pot.size): - assert pot_res[f"result_s{i}"] is pot[i] + for pot_i, pot_res_i in zip(pot, pot_res): + assert pot_i is pot_res_i return pot, SumpyTimingFuture(actx.queue, []) From 0fa102f2ea1506644372eaba0ffd2eddc2f49a61 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Wed, 21 Sep 2022 19:55:22 +0300 Subject: [PATCH 35/59] fix matrix generation --- sumpy/p2p.py | 4 ++-- test/test_kernels.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index df6247ee1..5d17041e3 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -324,7 +324,7 @@ def __call__(self, actx: PyOpenCLArrayContext, targets, sources, **kwargs): sources_is_obj_array=is_obj_array_like(sources)) result = actx.call_loopy(knl, sources=sources, targets=targets, **kwargs) - return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) + return make_obj_array([result[f"result_{i}"] for i in range(self.nresults)]) # }}} @@ -441,7 +441,7 @@ def __call__(self, actx: PyOpenCLArrayContext, tgtindices=tgtindices, srcindices=srcindices, **kwargs) - return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) + return make_obj_array([result[f"result_{i}"] for i in range(self.nresults)]) # }}} diff --git a/test/test_kernels.py b/test/test_kernels.py index afef0b7fb..3b53387f5 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -84,15 +84,15 @@ def test_p2p(actx_factory, exclude_self): extra_kwargs["target_to_source"] = ( actx.from_numpy(np.arange(n, dtype=np.int32))) - result, = knl( + potential, potential_ref = knl( actx, actx.from_numpy(targets), actx.from_numpy(sources), [actx.from_numpy(strengths)], **extra_kwargs) - potential = actx.to_numpy(result) - potential_ref = np.empty_like(potential) + potential = actx.to_numpy(potential) + potential_ref = np.empty_like(potential_ref) targets = targets.T sources = sources.T From a7bb63a23299b2fd22b0aed3a2e69be43d66f542 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sun, 25 Sep 2022 10:06:22 +0300 Subject: [PATCH 36/59] rip out timing collection --- sumpy/fmm.py | 91 ++++------------------------------------ test/test_distributed.py | 5 +-- test/test_fmm.py | 6 +-- 3 files changed, 10 insertions(+), 92 deletions(-) diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 15594ebff..47453797f 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -55,10 +55,6 @@ class SumpyTreeIndependentDataForWrangler(TreeIndependentDataForWrangler): necessarily must have an :class:`arraycontext.ArrayContext`, but it is allowed to be more ephemeral than the code, the code's lifetime is decoupled by storing it in this object. - - Timing results returned by this wrangler contain the values *wall_elapsed* - which measures elapsed wall time. This requires an array container with - profiling enabled. """ def __init__(self, array_context: PyOpenCLArrayContext, @@ -186,69 +182,6 @@ def opencl_fft_app(self, shape, dtype, inverse): # }}} -# {{{ timing future - -_SECONDS_PER_NANOSECOND = 1e-9 - - -""" -EventLike objects have an attribute native_event that returns -a cl.Event that indicates the end of the event. -""" -EventLike = TypeVar("CLEventLike") - - -class UnableToCollectTimingData(UserWarning): - pass - - -class SumpyTimingFuture: - - def __init__(self, queue, events: List[EventLike]): - self.queue = queue - self.events = events - - @property - def native_events(self) -> List[EventLike]: - import pyopencl as cl - return [evt if isinstance(evt, cl.Event) else evt.native_event - for evt in self.events] - - @memoize_method - def result(self): - import pyopencl as cl - from boxtree.timing import TimingResult - - if not self.queue.properties & cl.command_queue_properties.PROFILING_ENABLE: - from warnings import warn - warn( - "Profiling was not enabled in the command queue. " - "Timing data will not be collected.", - category=UnableToCollectTimingData, - stacklevel=3) - return TimingResult(wall_elapsed=None) - - if self.events: - cl.wait_for_events(self.native_events) - - result = 0 - for event in self.events: - result += ( - (event.profile.end - event.profile.start) - * _SECONDS_PER_NANOSECOND) - - return TimingResult(wall_elapsed=result) - - def done(self): - import pyopencl as cl - return all( - event.get_info(cl.event_info.COMMAND_EXECUTION_STATUS) - == cl.command_execution_status.COMPLETE - for event in self.native_events) - -# }}} - - # {{{ expansion wrangler class SumpyExpansionWrangler(ExpansionWranglerInterface): @@ -284,7 +217,6 @@ def __init__(self, tree_indep, traversal, dtype, fmm_level_to_order, preprocessed_mpole_dtype=None, *, _disable_translation_classes=False): super().__init__(tree_indep, traversal) - self.issued_timing_data_warning = False self.dtype = dtype @@ -609,8 +541,7 @@ def form_multipoles(self, assert mpoles_res is mpoles_view - # FIXME: rip out SumpyTimingFuture - return mpoles, SumpyTimingFuture(actx.queue, []) + return mpoles def coarsen_multipoles(self, actx: PyOpenCLArrayContext, @@ -664,7 +595,7 @@ def coarsen_multipoles(self, assert mpoles_res is target_mpoles_view - return mpoles, SumpyTimingFuture(actx.queue, []) + return mpoles def eval_direct(self, actx: PyOpenCLArrayContext, @@ -690,7 +621,7 @@ def eval_direct(self, for pot_i, pot_res_i in zip(pot, pot_res): assert pot_i is pot_res_i - return pot, SumpyTimingFuture(actx.queue, []) + return pot @memoize_method def multipole_to_local_precompute(self): @@ -789,10 +720,6 @@ def multipole_to_local(self, mpole_exps_view_func = self.multipole_expansions_view local_exps_view_func = self.local_expansions_view - preprocess_evts = [] - translate_evts = [] - postprocess_evts = [] - for lev in range(self.tree.nlevels): wait_for = [] @@ -894,9 +821,7 @@ def multipole_to_local(self, **self.kernel_extra_kwargs, ) - timing_events = preprocess_evts + translate_evts + postprocess_evts - - return (local_exps, SumpyTimingFuture(actx.queue, timing_events)) + return local_exps def eval_multipoles(self, actx: PyOpenCLArrayContext, @@ -937,7 +862,7 @@ def eval_multipoles(self, for pot_i, pot_res_i in zip(pot, pot_res): assert pot_i is pot_res_i - return pot, SumpyTimingFuture(actx.queue, []) + return pot def form_locals(self, actx: PyOpenCLArrayContext, @@ -978,7 +903,7 @@ def form_locals(self, assert result is target_local_exps_view - return local_exps, SumpyTimingFuture(actx.queue, []) + return local_exps def refine_locals(self, actx: PyOpenCLArrayContext, @@ -1021,7 +946,7 @@ def refine_locals(self, assert local_exps_res is target_local_exps_view - return local_exps, SumpyTimingFuture(actx.queue, []) + return local_exps def eval_locals(self, actx: PyOpenCLArrayContext, @@ -1059,7 +984,7 @@ def eval_locals(self, for pot_i, pot_res_i in zip(pot, pot_res): assert pot_i is pot_res_i - return pot, SumpyTimingFuture(actx.queue, []) + return pot def finalize_potentials(self, actx: PyOpenCLArrayContext, potentials): return potentials diff --git a/test/test_distributed.py b/test/test_distributed.py index 26313fa96..e436fca37 100644 --- a/test/test_distributed.py +++ b/test/test_distributed.py @@ -128,10 +128,7 @@ def wrangler_factory(local_traversal, global_traversal): distribued_fmm_info = DistributedFMMRunner( actx, global_tree_dev, traversal_builder, wrangler_factory, comm=comm) - timing_data = {} - distributed_potential = distribued_fmm_info.drive_dfmm( - actx, [sources_weights], timing_data=timing_data) - assert timing_data + distributed_potential = distribued_fmm_info.drive_dfmm(actx, [sources_weights]) if mpi_rank == 0: assert shmem_potential.shape == (1,) diff --git a/test/test_fmm.py b/test/test_fmm.py index f5e0c8d3e..08964ccaa 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -485,11 +485,7 @@ def test_sumpy_fmm_timing_data_collection(ctx_factory, use_fft, visualize=False) fmm_level_to_order=lambda kernel, kernel_args, tree, lev: order) from boxtree.fmm import drive_fmm - timing_data = {} - pot, = drive_fmm(actx, wrangler, (weights,), timing_data=timing_data) - logger.info("timing_data:\n%s", timing_data) - - assert timing_data + pot, = drive_fmm(actx, wrangler, (weights,)) def test_sumpy_fmm_exclude_self(actx_factory, visualize=False): From 567b947acadfbe5ed8618501dda47a80088b5aa5 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sun, 25 Sep 2022 10:19:02 +0300 Subject: [PATCH 37/59] remove unused imports (flake8) --- sumpy/fmm.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 7cc7fb183..a6232f079 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -44,8 +44,6 @@ M2LPreprocessMultipole, M2LPostprocessLocal) from sumpy.tools import (to_complex_dtype, run_opencl_fft, get_opencl_fft_app) -from typing import TypeVar, List - # {{{ tree-independent data for wrangler From ea7656dc1b1a539a8112cf8c91086c628508c0fb Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sun, 25 Sep 2022 11:00:28 +0300 Subject: [PATCH 38/59] fix return value for form_locals --- test/test_fmm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_fmm.py b/test/test_fmm.py index 08964ccaa..a7732b562 100644 --- a/test/test_fmm.py +++ b/test/test_fmm.py @@ -318,7 +318,7 @@ def fmm_level_to_order(kernel, kernel_args, tree, lev): weights = wrangler.reorder_sources(weights) (weights,) = wrangler.distribute_source_weights(actx, (weights,), None) - local_result, _ = wrangler.form_locals( + local_result = wrangler.form_locals( actx, trav.level_start_target_or_target_parent_box_nrs, trav.target_or_target_parent_boxes, From bd5f5783e67355d8d05601778428fd368fe82c4e Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 26 Sep 2022 09:44:34 +0300 Subject: [PATCH 39/59] remove ctx arg in KernelComputation --- sumpy/tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sumpy/tools.py b/sumpy/tools.py index 46bbe3227..70f65a79f 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -567,7 +567,7 @@ class KernelComputation(ABC): .. automethod:: get_kernel """ - def __init__(self, ctx: Any, + def __init__(self, target_kernels: List["Kernel"], source_kernels: List["Kernel"], strength_usage: Optional[List[int]] = None, From 4c3d8385f642296e29e129274250958b0eb48681 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Thu, 29 Sep 2022 11:09:39 +0300 Subject: [PATCH 40/59] back out some unneeded changes --- sumpy/__init__.py | 3 +- sumpy/array_context.py | 8 ------ sumpy/e2e.py | 14 ++++----- sumpy/e2p.py | 9 +++--- sumpy/fmm.py | 65 +++++++++++++++++++++--------------------- sumpy/p2e.py | 7 ++--- sumpy/p2p.py | 25 +++++++++------- sumpy/qbx.py | 18 ++++++++---- sumpy/tools.py | 1 + sumpy/toys.py | 28 +++++++++--------- 10 files changed, 89 insertions(+), 89 deletions(-) diff --git a/sumpy/__init__.py b/sumpy/__init__.py index 2cbc40a61..eb0489491 100644 --- a/sumpy/__init__.py +++ b/sumpy/__init__.py @@ -24,7 +24,8 @@ from sumpy.p2p import P2P, P2PFromCSR from sumpy.p2e import P2EFromSingleBox, P2EFromCSR from sumpy.e2p import E2PFromSingleBox, E2PFromCSR -from sumpy.e2e import (E2EFromCSR, E2EFromChildren, E2EFromParent, +from sumpy.e2e import ( + E2EFromCSR, E2EFromChildren, E2EFromParent, M2LUsingTranslationClassesDependentData, M2LGenerateTranslationClassesDependentData, M2LPreprocessMultipole, M2LPostprocessLocal) diff --git a/sumpy/array_context.py b/sumpy/array_context.py index a369b56bb..04f31568b 100644 --- a/sumpy/array_context.py +++ b/sumpy/array_context.py @@ -79,14 +79,6 @@ def make_loopy_program( class PyOpenCLArrayContext(PyOpenCLArrayContextBase): def transform_loopy_program(self, t_unit): - for name in t_unit.entrypoints: - options = t_unit[name].options - if not (options.return_dict and options.no_numpy): - raise ValueError( - f"loopy kernel '{name}' passed to call_loopy must " - "have 'return_dict' and 'no_numpy' options set. " - "Did you use 'make_loopy_program' to create this kernel?") - return t_unit diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 9523fc4d9..f9f153ed6 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -236,18 +236,18 @@ def get_kernel(self): ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, - silenced_warnings="write_race(write_expn*)", assumptions="ntgt_boxes>=1", + silenced_warnings="write_race(write_expn*)", fixed_parameters=dict(dim=self.dim), ) + for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: + loopy_knl = knl.prepare_loopy_kernel(loopy_knl) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.set_options(loopy_knl, enforce_variable_access_ordered="no_check") - for knl in [self.src_expansion.kernel, self.tgt_expansion.kernel]: - loopy_knl = knl.prepare_loopy_kernel(loopy_knl) - return loopy_knl def get_optimized_kernel(self): @@ -499,7 +499,7 @@ def get_optimized_kernel(self, result_dtype): return knl - def __call__(self, actx, **kwargs): + def __call__(self, actx: PyOpenCLArrayContext, **kwargs): """ :arg src_expansions: :arg src_box_starts: @@ -897,8 +897,8 @@ def get_kernel(self): ... ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, - silenced_warnings="write_race(write_expn*)", assumptions="ntgt_boxes>=1", + silenced_warnings="write_race(write_expn*)", fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim), ) @@ -1004,8 +1004,8 @@ def get_kernel(self): ... ] + gather_loopy_arguments([self.src_expansion, self.tgt_expansion]), name=self.name, - silenced_warnings="write_race(write_expn*)", assumptions="ntgt_boxes>=1", + silenced_warnings="write_race(write_expn*)", fixed_parameters=dict(dim=self.dim, nchildren=2**self.dim), ) diff --git a/sumpy/e2p.py b/sumpy/e2p.py index cd7f8082d..f773743f5 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -59,8 +59,8 @@ def __init__(self, expansion, kernels, name=None): SourceTransformationRemover, TargetTransformationRemover) sxr = SourceTransformationRemover() txr = TargetTransformationRemover() - expansion = expansion.with_kernel(sxr(expansion.kernel)) + kernels = [sxr(knl) for knl in kernels] for knl in kernels: assert txr(knl) == expansion.kernel @@ -197,7 +197,6 @@ def get_kernel(self): ) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") - for knl in self.kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) @@ -233,7 +232,6 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): knl, centers=centers, rscale=rscale, **kwargs) - # FIXME: cleaner way to get the names out? return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) # }}} @@ -316,7 +314,6 @@ def get_kernel(self): loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.prioritize_loops(loopy_knl, "itgt_box,itgt,isrc_box") - for knl in self.kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) @@ -340,7 +337,9 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): knl = self.get_cached_optimized_kernel() result = actx.call_loopy( knl, - centers=centers, rscale=rscale, **kwargs) + centers=centers, + rscale=rscale, + **kwargs) return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) diff --git a/sumpy/fmm.py b/sumpy/fmm.py index a6232f079..1ad6eee17 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -42,7 +42,7 @@ E2EFromChildren, E2EFromParent, M2LGenerateTranslationClassesDependentData, M2LPreprocessMultipole, M2LPostprocessLocal) -from sumpy.tools import (to_complex_dtype, run_opencl_fft, get_opencl_fft_app) +from sumpy.tools import to_complex_dtype, run_opencl_fft, get_opencl_fft_app # {{{ tree-independent data for wrangler @@ -50,12 +50,12 @@ class SumpyTreeIndependentDataForWrangler(TreeIndependentDataForWrangler): """Objects of this type serve as a place to keep the code needed for :class:`SumpyExpansionWrangler`. Since :class:`SumpyExpansionWrangler` - necessarily must have an :class:`arraycontext.ArrayContext`, but it - is allowed to be more ephemeral than the code, the code's lifetime - is decoupled by storing it in this object. + contains data that is allowed to be more ephemeral than the code, the code's + lifetime is decoupled by storing it in this object. """ - def __init__(self, array_context: PyOpenCLArrayContext, + def __init__(self, + array_context: PyOpenCLArrayContext, multipole_expansion_factory, local_expansion_factory, target_kernels, exclude_self=False, use_rscale=None, @@ -70,6 +70,8 @@ def __init__(self, array_context: PyOpenCLArrayContext, :arg strength_usage: passed unchanged to p2l, p2m and p2p. :arg source_kernels: passed unchanged to p2l, p2m and p2p. """ + super().__init__() + self._setup_actx = array_context self.multipole_expansion_factory = multipole_expansion_factory @@ -80,8 +82,6 @@ def __init__(self, array_context: PyOpenCLArrayContext, self.use_rscale = use_rscale self.strength_usage = strength_usage - super().__init__() - @memoize_method def get_base_kernel(self): from pytools import single_valued @@ -207,7 +207,8 @@ class SumpyExpansionWrangler(ExpansionWranglerInterface): Type for the preprocessed multipole expansion if used for M2L. """ - def __init__(self, tree_indep, traversal, dtype, fmm_level_to_order, + def __init__(self, + tree_indep, traversal, dtype, fmm_level_to_order, source_extra_kwargs=None, kernel_extra_kwargs=None, self_extra_kwargs=None, @@ -266,10 +267,6 @@ def __init__(self, tree_indep, traversal, dtype, fmm_level_to_order, self.translation_classes_data = translation_classes_data - @property - def _setup_actx(self): - return self.tree_indep._setup_actx - def level_to_rscale(self, level): tree = self.tree order = self.level_orders[level] @@ -294,7 +291,10 @@ def level_to_rscale(self, level): @property @memoize_method def tree_level_start_box_nrs(self): - return self._setup_actx.to_numpy(self.tree.level_start_box_nrs) + # NOTE: a host version of `level_start_box_nrs` is used repeatedly and + # this simply caches it to avoid repeated transfers + actx = self.tree_indep._setup_actx + return actx.to_numpy(self.tree.level_start_box_nrs) def _expansions_level_starts(self, order_to_size): return build_csr_level_starts(self.level_orders, order_to_size, @@ -312,7 +312,7 @@ def local_expansions_level_starts(self): @memoize_method def m2l_translation_class_level_start_box_nrs(self): - actx = self._setup_actx + actx = self.tree_indep._setup_actx return actx.to_numpy( self.translation_classes_data .from_sep_siblings_translation_classes_level_starts) @@ -369,25 +369,25 @@ def m2l_translation_classes_dependent_data_zeros( return result def multipole_expansions_view(self, mpole_exps, level): - expn_start, expn_stop = \ - self.multipole_expansions_level_starts()[level:level+2] - box_start, box_stop = self.tree_level_start_box_nrs[level:level+2] + expn_start, expn_stop = ( + self.multipole_expansions_level_starts()[level:level + 2]) + box_start, box_stop = self.tree_level_start_box_nrs[level:level + 2] return (box_start, mpole_exps[expn_start:expn_stop].reshape(box_stop-box_start, -1)) def local_expansions_view(self, local_exps, level): - expn_start, expn_stop = \ - self.local_expansions_level_starts()[level:level+2] - box_start, box_stop = self.tree_level_start_box_nrs[level:level+2] + expn_start, expn_stop = ( + self.local_expansions_level_starts()[level:level + 2]) + box_start, box_stop = self.tree_level_start_box_nrs[level:level + 2] return (box_start, local_exps[expn_start:expn_stop].reshape(box_stop-box_start, -1)) def m2l_translation_classes_dependent_data_view(self, m2l_translation_classes_dependent_data, level): - translation_class_start, _ = \ - self.m2l_translation_class_level_start_box_nrs()[level:level+2] + translation_class_start, _ = ( + self.m2l_translation_class_level_start_box_nrs()[level:level + 2]) exprs_level = m2l_translation_classes_dependent_data[level] return (translation_class_start, exprs_level) @@ -426,8 +426,7 @@ def m2l_preproc_mpole_expansions_view(self, mpole_exps, level): m2l_work_array_view = m2l_preproc_mpole_expansions_view m2l_work_array_zeros = m2l_preproc_mpole_expansion_zeros - m2l_work_array_level_starts = \ - m2l_preproc_mpole_expansions_level_starts + m2l_work_array_level_starts = m2l_preproc_mpole_expansions_level_starts def output_zeros(self, actx: PyOpenCLArrayContext) -> np.ndarray: """Return a potentials array (which must support addition) capable of @@ -459,7 +458,7 @@ def reorder(x): @property @memoize_method def max_nsources_in_one_box(self): - actx = self._setup_actx + actx = self.tree_indep._setup_actx return actx.to_numpy( actx.np.max(self.tree.box_source_counts_nonchild) ).item() @@ -467,7 +466,7 @@ def max_nsources_in_one_box(self): @property @memoize_method def max_ntargets_in_one_box(self): - actx = self._setup_actx + actx = self.tree_indep._setup_actx return actx.to_numpy( actx.np.max(self.tree.box_target_counts_nonchild) ).item() @@ -570,10 +569,10 @@ def coarsen_multipoles(self, self.level_orders[source_level], self.level_orders[target_level]) - source_level_start_ibox, source_mpoles_view = \ - self.multipole_expansions_view(mpoles, source_level) - target_level_start_ibox, target_mpoles_view = \ - self.multipole_expansions_view(mpoles, target_level) + source_level_start_ibox, source_mpoles_view = ( + self.multipole_expansions_view(mpoles, source_level)) + target_level_start_ibox, target_mpoles_view = ( + self.multipole_expansions_view(mpoles, target_level)) mpoles_res = m2m( actx, @@ -623,7 +622,7 @@ def eval_direct(self, @memoize_method def multipole_to_local_precompute(self): - actx = self._setup_actx + actx = self.tree_indep._setup_actx result = [] m2l_translation_classes_dependent_data = ( @@ -642,8 +641,8 @@ def multipole_to_local_precompute(self): self.m2l_translation_classes_dependent_data_view( m2l_translation_classes_dependent_data, lev) - ntranslation_classes = \ - m2l_translation_classes_dependent_data_view.shape[0] + ntranslation_classes = ( + m2l_translation_classes_dependent_data_view.shape[0]) if ntranslation_classes == 0: result.append(actx.np.zeros_like( diff --git a/sumpy/p2e.py b/sumpy/p2e.py index dce7127b5..558e8f357 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -215,17 +215,16 @@ def get_kernel(self): ] + gather_loopy_source_arguments( self.source_kernels + (self.expansion,)), name=self.name, - silenced_warnings="write_race(write_expn*)", assumptions="nsrc_boxes>=1", + silenced_warnings="write_race(write_expn*)", fixed_parameters=dict( dim=self.dim, strength_count=self.strength_count), ) - loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") - for knl in self.source_kernels: loopy_knl = knl.prepare_loopy_kernel(loopy_knl) + loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") return loopy_knl @@ -337,8 +336,8 @@ def get_kernel(self): """], kernel_data=arguments, name=self.name, - silenced_warnings="write_race(write_expn*)", assumptions="ntgt_boxes>=1", + silenced_warnings="write_race(write_expn*)", fixed_parameters=dict( dim=self.dim, strength_count=self.strength_count), diff --git a/sumpy/p2p.py b/sumpy/p2p.py index f8a392da0..6d4ea211d 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -237,8 +237,8 @@ def get_kernel(self): """ for iknl in range(len(self.target_kernels))] + ["end"], kernel_data=arguments, - name=self.name, assumptions="nsources>=1 and ntargets>=1", + name=self.name, fixed_parameters=dict( dim=self.dim, nstrengths=self.strength_count, @@ -255,12 +255,14 @@ def get_kernel(self): def __call__(self, actx: PyOpenCLArrayContext, targets, sources, strength, **kwargs): knl = self.get_cached_optimized_kernel( - targets_is_obj_array=is_obj_array_like(targets), - sources_is_obj_array=is_obj_array_like(sources)) + targets_is_obj_array=is_obj_array_like(targets), + sources_is_obj_array=is_obj_array_like(sources)) result = actx.call_loopy( knl, - sources=sources, targets=targets, strength=strength, + sources=sources, + targets=targets, + strength=strength, **kwargs) return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) @@ -307,8 +309,8 @@ def get_kernel(self): """ for iknl in range(len(self.target_kernels))] + ["end"], arguments, - name=self.name, assumptions="nsources>=1 and ntargets>=1", + name=self.name, fixed_parameters=dict(dim=self.dim), ) @@ -386,9 +388,9 @@ def get_kernel(self): """ for iknl in range(len(self.target_kernels))] + ["end"], arguments, + assumptions="nresult>=1", silenced_warnings="write_race(write_p2p*)", name=self.name, - assumptions="nresult>=1", fixed_parameters=dict(dim=self.dim), ) @@ -629,13 +631,13 @@ def get_kernel(self, loopy_knl = make_loopy_program( domains, instructions, - arguments, + kernel_data=arguments, + assumptions="ntgt_boxes>=1", name=self.name, silenced_warnings=[ "write_race(write_csr*)", "write_race(prefetch_src)", "write_race(prefetch_charge)"], - assumptions="ntgt_boxes>=1", fixed_parameters=dict( dim=self.dim, nstrengths=self.strength_count, @@ -659,7 +661,8 @@ def get_kernel(self, return loopy_knl def get_optimized_kernel(self, - max_nsources_in_one_box: int, max_ntargets_in_one_box: int, + max_nsources_in_one_box: int, + max_ntargets_in_one_box: int, is_cpu: bool): if is_cpu: knl = self.get_kernel(max_nsources_in_one_box, @@ -681,13 +684,13 @@ def get_optimized_kernel(self, return knl def __call__(self, actx: PyOpenCLArrayContext, **kwargs): - import pyopencl as cl + from sumpy.array_context import is_cl_cpu max_nsources_in_one_box = kwargs.pop("max_nsources_in_one_box") max_ntargets_in_one_box = kwargs.pop("max_ntargets_in_one_box") knl = self.get_cached_optimized_kernel( max_nsources_in_one_box=max_nsources_in_one_box, max_ntargets_in_one_box=max_ntargets_in_one_box, - is_cpu=actx.queue.device.type & cl.device_type.CPU) + is_cpu=is_cl_cpu(actx)) result = actx.call_loopy(knl, **kwargs) return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 54d8b4d68..6701be944 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -280,8 +280,8 @@ def get_kernel(self): + ["end"], kernel_data=arguments, name=self.name, - silenced_warnings="write_race(write_lpot*)", assumptions="ntargets>=1 and nsources>=1", + silenced_warnings="write_race(write_lpot*)", fixed_parameters=dict(dim=self.dim), ) @@ -310,8 +310,11 @@ def __call__(self, actx: PyOpenCLArrayContext, result = actx.call_loopy( knl, - sources=sources, targets=targets, center=centers, - expansion_radii=expansion_radii, **kwargs) + sources=sources, + targets=targets, + center=centers, + expansion_radii=expansion_radii, + **kwargs) return make_obj_array([result[f"result_{i}"] for i in range(self.nresults)]) @@ -383,8 +386,11 @@ def __call__(self, actx: PyOpenCLArrayContext, result = actx.call_loopy( knl, - sources=sources, targets=targets, center=centers, - expansion_radii=expansion_radii, **kwargs) + sources=sources, + targets=targets, + center=centers, + expansion_radii=expansion_radii, + **kwargs) return make_obj_array([result[f"result_{i}"] for i in range(self.nresults)]) @@ -447,8 +453,8 @@ def get_kernel(self): + ["end"], kernel_data=arguments, name=self.name, - silenced_warnings="write_race(write_lpot*)", assumptions="nresult>=1", + silenced_warnings="write_race(write_lpot*)", fixed_parameters=dict(dim=self.dim), ) diff --git a/sumpy/tools.py b/sumpy/tools.py index 70f65a79f..9c7aaa849 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -1141,6 +1141,7 @@ def loopy_fft(shape, inverse, complex_dtype, index_dtype=None, kernel_data=kernel_data, name=name, fixed_parameters=fixed_parameters, + index_dtype=index_dtype, ) if broadcast_dims: diff --git a/sumpy/toys.py b/sumpy/toys.py index eb68522f3..007ac2531 100644 --- a/sumpy/toys.py +++ b/sumpy/toys.py @@ -137,50 +137,50 @@ def get_p2p(self): def get_p2m(self, order): from sumpy import P2EFromSingleBox return P2EFromSingleBox( - self.mpole_expn_class(self.no_target_deriv_kernel, order), - kernels=(self.kernel,)) + self.mpole_expn_class(self.no_target_deriv_kernel, order), + kernels=(self.kernel,)) @memoize_method def get_p2l(self, order): from sumpy import P2EFromSingleBox return P2EFromSingleBox( - self.local_expn_class(self.no_target_deriv_kernel, order), - kernels=(self.kernel,)) + self.local_expn_class(self.no_target_deriv_kernel, order), + kernels=(self.kernel,)) @memoize_method def get_m2p(self, order): from sumpy import E2PFromSingleBox return E2PFromSingleBox( - self.mpole_expn_class(self.no_target_deriv_kernel, order), - (self.kernel,)) + self.mpole_expn_class(self.no_target_deriv_kernel, order), + (self.kernel,)) @memoize_method def get_l2p(self, order): from sumpy import E2PFromSingleBox return E2PFromSingleBox( - self.local_expn_class(self.no_target_deriv_kernel, order), - (self.kernel,)) + self.local_expn_class(self.no_target_deriv_kernel, order), + (self.kernel,)) @memoize_method def get_m2m(self, from_order, to_order): from sumpy import E2EFromCSR return E2EFromCSR( - self.mpole_expn_class(self.no_target_deriv_kernel, from_order), - self.mpole_expn_class(self.no_target_deriv_kernel, to_order)) + self.mpole_expn_class(self.no_target_deriv_kernel, from_order), + self.mpole_expn_class(self.no_target_deriv_kernel, to_order)) @memoize_method def get_m2l(self, from_order, to_order): from sumpy import E2EFromCSR return E2EFromCSR( - self.mpole_expn_class(self.no_target_deriv_kernel, from_order), - self.local_expn_class(self.no_target_deriv_kernel, to_order)) + self.mpole_expn_class(self.no_target_deriv_kernel, from_order), + self.local_expn_class(self.no_target_deriv_kernel, to_order)) @memoize_method def get_l2l(self, from_order, to_order): from sumpy import E2EFromCSR return E2EFromCSR( - self.local_expn_class(self.no_target_deriv_kernel, from_order), - self.local_expn_class(self.no_target_deriv_kernel, to_order)) + self.local_expn_class(self.no_target_deriv_kernel, from_order), + self.local_expn_class(self.no_target_deriv_kernel, to_order)) # }}} From 60e194dfbe6e555da17895fc41b9edcb0256978e Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Thu, 29 Sep 2022 11:19:26 +0300 Subject: [PATCH 41/59] point ci to updated pytential --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b48e98298..646da42db 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -101,9 +101,9 @@ jobs: run: | curl -L -O https://tiker.net/ci-support-v0 . ./ci-support-v0 - # if [[ "$DOWNSTREAM_PROJECT" == "pytential" && "$GITHUB_HEAD_REF" == "fft" ]]; then - # DOWNSTREAM_PROJECT=https://github.com/isuruf/pytential.git@pyvkfft - # fi + if [[ "$DOWNSTREAM_PROJECT" == "pytential" && "$GITHUB_HEAD_REF" == "towards-array-context" ]]; then + DOWNSTREAM_PROJECT=https://github.com/alexfikl/pytential.git@towards-array-context + fi test_downstream "$DOWNSTREAM_PROJECT" # vim: sw=4 From 45a023e74983c161cac0eedfa6af5343b4608384 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Thu, 29 Sep 2022 14:33:34 +0300 Subject: [PATCH 42/59] fix kwargs name --- sumpy/distributed.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sumpy/distributed.py b/sumpy/distributed.py index ee257b7e2..ac3bcefaf 100644 --- a/sumpy/distributed.py +++ b/sumpy/distributed.py @@ -32,9 +32,10 @@ def __init__( self, actx: PyOpenCLArrayContext, comm, tree_indep, local_traversal, global_traversal, dtype, fmm_level_to_order, communicate_mpoles_via_allreduce=False, - **kwarg): + **kwargs): SumpyExpansionWrangler.__init__( - self, tree_indep, local_traversal, dtype, fmm_level_to_order, **kwarg) + self, tree_indep, local_traversal, dtype, fmm_level_to_order, + **kwargs) self.comm = comm self.traversal_in_device_memory = True From e28d29505a53cf5e32d44510ac6c49dcecdaeca0 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sun, 30 Oct 2022 10:47:50 +0200 Subject: [PATCH 43/59] fix merge --- sumpy/toys.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sumpy/toys.py b/sumpy/toys.py index d865dbfbc..1d8b026b7 100644 --- a/sumpy/toys.py +++ b/sumpy/toys.py @@ -629,7 +629,7 @@ def local_expand( def logplot( actx: PyOpenCLArrayContext, - fp: FieldPlotter, + fp: "FieldPlotter", psource: PotentialSource, **kwargs) -> None: fp.show_scalar_in_matplotlib( np.log10(np.abs(psource.eval(actx, fp.points) + 1e-15)), **kwargs) From 53933e7685793e9215c3b4c37a85493a0d48e3c9 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Fri, 28 Apr 2023 10:34:07 +0300 Subject: [PATCH 44/59] docs: add pytools to intersphinx --- doc/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/conf.py b/doc/conf.py index 4f487d994..917c4bcfe 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -19,6 +19,7 @@ "arraycontext": ("https://documen.tician.de/arraycontext/", None), "python": ("https://docs.python.org/3/", None), "numpy": ("https://numpy.org/doc/stable/", None), + "pytools": ("https://documen.tician.de/pytools/", None), "modepy": ("https://documen.tician.de/modepy/", None), "pyopencl": ("https://documen.tician.de/pyopencl/", None), "pymbolic": ("https://documen.tician.de/pymbolic/", None), From b7203e20e3c4ad7a8f9811e5eef951aa70b205fe Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Fri, 16 Jun 2023 11:06:08 +0300 Subject: [PATCH 45/59] fix device handling in p2p --- sumpy/p2p.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 28cddb20d..72007e910 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -91,9 +91,6 @@ def __init__(self, target_kernels, exclude_self, strength_usage=None, source_kernels=source_kernels, strength_usage=strength_usage, value_dtypes=value_dtypes, name=name) - import pyopencl as cl - self.is_gpu = not (self.device.type & cl.device_type.CPU) - self.exclude_self = exclude_self self.dim = single_valued([ knl.dim for knl in self.target_kernels + self.source_kernels @@ -194,9 +191,6 @@ def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array): knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") - from sumpy.codegen import register_optimization_preambles - knl = register_optimization_preambles(knl, self.device) - return knl @@ -264,6 +258,9 @@ def __call__(self, actx: PyOpenCLArrayContext, targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) + from sumpy.codegen import register_optimization_preambles + knl = register_optimization_preambles(knl, actx.queue.device) + result = actx.call_loopy( knl, sources=sources, @@ -332,6 +329,9 @@ def __call__(self, actx: PyOpenCLArrayContext, targets, sources, **kwargs): targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) + from sumpy.codegen import register_optimization_preambles + knl = register_optimization_preambles(knl, actx.queue.device) + result = actx.call_loopy(knl, sources=sources, targets=targets, **kwargs) return make_obj_array([result[f"result_{i}"] for i in range(self.nresults)]) @@ -445,6 +445,9 @@ def __call__(self, actx: PyOpenCLArrayContext, targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) + from sumpy.codegen import register_optimization_preambles + knl = register_optimization_preambles(knl, actx.queue.device) + result = actx.call_loopy( knl, targets=targets, @@ -680,8 +683,9 @@ def get_optimized_kernel(self, max_nsources_in_one_box: int, max_ntargets_in_one_box: int, dtype_size: int, + local_mem_size: int, is_gpu: bool): - if not self.is_gpu: + if not is_gpu: knl = self.get_kernel(max_nsources_in_one_box, max_ntargets_in_one_box, is_gpu=is_gpu) knl = lp.split_iname(knl, "itgt_box", 4, outer_tag="g.0") @@ -692,7 +696,7 @@ def get_optimized_kernel(self, (self.dim + self.strength_count) * dtype_size # multiplying by 2 here to make sure at least 2 work groups # can be scheduled at the same time for latency hiding - nprefetch = (2 * total_local_mem - 1) // self.device.local_mem_size + 1 + nprefetch = (2 * total_local_mem - 1) // local_mem_size + 1 knl = self.get_kernel(max_nsources_in_one_box, max_ntargets_in_one_box, @@ -739,9 +743,6 @@ def get_optimized_kernel(self, knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") - from sumpy.codegen import register_optimization_preambles - knl = register_optimization_preambles(knl, self.device) - return knl def __call__(self, actx: PyOpenCLArrayContext, **kwargs): @@ -759,8 +760,12 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): max_nsources_in_one_box=max_nsources_in_one_box, max_ntargets_in_one_box=max_ntargets_in_one_box, dtype_size=dtype_size, + local_mem_size=actx.queue.device.local_mem_size, is_gpu=is_gpu) + from sumpy.codegen import register_optimization_preambles + knl = register_optimization_preambles(knl, actx.queue.device) + result = actx.call_loopy(knl, **kwargs) return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) From 51ac5f5e8f11f7ad591693ed378df6d2664bb926 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 5 Aug 2023 19:20:44 +0300 Subject: [PATCH 46/59] fix bad merge --- sumpy/tools.py | 4 ++-- sumpy/toys.py | 6 ++---- test/test_kernels.py | 1 - 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/sumpy/tools.py b/sumpy/tools.py index c3b814092..04ec4739b 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -926,8 +926,8 @@ def get_opencl_fft_app( def run_opencl_fft( actx: PyOpenCLArrayContext, - fft_app: Tuple[Any, FFTBackend], *, - input_vec: Any, + fft_app: Tuple[Any, FFTBackend], + input_vec: Any, *, inverse: bool = False, wait_for: List["pyopencl.Event"] = None) -> Tuple["pyopencl.Event", Any]: """Runs an FFT on input_vec and returns a :class:`MarkerBasedProfilingEvent` diff --git a/sumpy/toys.py b/sumpy/toys.py index 7f3f8c5c5..c4c5b70ba 100644 --- a/sumpy/toys.py +++ b/sumpy/toys.py @@ -1,3 +1,5 @@ +from __future__ import annotations + __copyright__ = """ Copyright (C) 2017 Andreas Kloeckner Copyright (C) 2017 Matt Wala @@ -23,8 +25,6 @@ THE SOFTWARE. """ -from __future__ import annotations - from numbers import Number from functools import partial from typing import Any, Sequence, Union, Optional, TYPE_CHECKING @@ -244,8 +244,6 @@ def _e2p(actx, psource, targets, e2p): np.array(psource.center, dtype=np.float64).reshape(toy_ctx.kernel.dim, 1)) from pytools.obj_array import make_obj_array - from sumpy.tools import vector_to_device - coeffs = actx.from_numpy(np.array([psource.coeffs])) pot, = e2p( actx, diff --git a/test/test_kernels.py b/test/test_kernels.py index f7f8bd661..05deecceb 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -32,7 +32,6 @@ from sumpy.array_context import ( # noqa: F401 PytestPyOpenCLArrayContextFactory, _acf) -from pytools.obj_array import make_obj_array import sumpy.symbolic as sym from sumpy.expansion.multipole import ( VolumeTaylorMultipoleExpansion, From 4fe4f01b0d9094abeec0f331a7a60b9ff6f4fed8 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Tue, 17 Oct 2023 11:27:07 +0300 Subject: [PATCH 47/59] fix bad merge --- sumpy/toys.py | 8 +- test/test_kernels.py | 203 ++++++------------------------------------- 2 files changed, 32 insertions(+), 179 deletions(-) diff --git a/sumpy/toys.py b/sumpy/toys.py index 24b137b2b..09b72da20 100644 --- a/sumpy/toys.py +++ b/sumpy/toys.py @@ -420,7 +420,7 @@ def _m2l(actx: PyOpenCLArrayContext, m2l_translation_classes_dependent_data, inverse=False) - ret = _e2e(psource, to_center, to_rscale, to_order, + ret = _e2e(actx, psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs, { "src_expansions": preprocessed_src_expansions, @@ -454,8 +454,10 @@ def _m2l(actx: PyOpenCLArrayContext, actx.to_numpy(to_coeffs)[0], derived_from=psource, **expn_kwargs) else: - ret = _e2e(psource, to_center, to_rscale, to_order, e2e, expn_class, - expn_kwargs, {}) + ret = _e2e( + actx, + psource, to_center, to_rscale, to_order, e2e, expn_class, + expn_kwargs, {}) return ret diff --git a/test/test_kernels.py b/test/test_kernels.py index 33da6380a..3d2babae2 100644 --- a/test/test_kernels.py +++ b/test/test_kernels.py @@ -20,8 +20,8 @@ THE SOFTWARE. """ -import pytest import sys +import pytest from functools import partial import numpy as np @@ -179,7 +179,6 @@ def test_p2e_multiple(actx_factory, base_knl, expn_class): + center[:, np.newaxis]) centers = actx.from_numpy(centers) - centers = actx.from_numpy(centers) rscale = 0.5 # pick something non-1 # apply p2e at the same time @@ -557,40 +556,13 @@ def test_translations(actx_factory, knl, local_expn_class, mpole_expn_class, else: orders = [3, 4, 5] - nboxes = centers.shape[-1] - - def eval_at(e2p, source_box_nr, rscale): - e2p_target_boxes = actx.from_numpy( - np.array([source_box_nr], dtype=np.int32)) - - # These are indexed by global box numbers. - e2p_box_target_starts = actx.from_numpy( - np.array([0, 0, 0, 0], dtype=np.int32)) - e2p_box_target_counts_nonchild = actx.from_numpy( - np.array([0, 0, 0, 0], dtype=np.int32)) - e2p_box_target_counts_nonchild[source_box_nr] = fp.points.shape[-1] - - pot, = e2p( - actx, - src_expansions=mpoles, - src_base_ibox=0, - target_boxes=e2p_target_boxes, - box_target_starts=e2p_box_target_starts, - box_target_counts_nonchild=e2p_box_target_counts_nonchild, - centers=centers, - targets=targets, - rscale=rscale, - **extra_kwargs - ) - pot = actx.to_numpy(pot) - - return pot - - m2l_factory = NonFFTM2LTranslationClassFactory() + if use_fft: + m2l_factory = FFTM2LTranslationClassFactory() + else: + m2l_factory = NonFFTM2LTranslationClassFactory() m2l_translation = m2l_factory.get_m2l_translation_class(knl, local_expn_class)() toy_ctx = t.ToyContext( - actx.context, kernel=knl, local_expn_class=partial(local_expn_class, m2l_translation=m2l_translation), @@ -599,7 +571,7 @@ def eval_at(e2p, source_box_nr, rscale): ) p = t.PointSources(toy_ctx, sources, weights=strengths) - p2p = p.eval(targets) + p2p = p.eval(actx, targets) m1_rscale = 0.5 m2_rscale = 0.25 @@ -607,155 +579,34 @@ def eval_at(e2p, source_box_nr, rscale): l2_rscale = 0.25 for order in orders: - m_expn = mpole_expn_class(knl, order=order) - l_expn = local_expn_class(knl, order=order, m2l_translation=m2l_translation) - - from sumpy import P2EFromSingleBox, E2PFromSingleBox, P2P, E2EFromCSR - p2m = P2EFromSingleBox(m_expn) - m2m = E2EFromCSR(m_expn, m_expn) - m2p = E2PFromSingleBox(m_expn, target_kernels) - m2l = E2EFromCSR(m_expn, l_expn) - l2l = E2EFromCSR(l_expn, l_expn) - l2p = E2PFromSingleBox(l_expn, target_kernels) - p2p = P2P(target_kernels, exclude_self=False) - - targets = actx.from_numpy(make_obj_array(fp.points)) - - # {{{ compute (direct) reference solution - - pot_direct, = p2p( - actx, - targets, sources, (strengths,), - **extra_kwargs) - pot_direct = actx.to_numpy(pot_direct) - - # }}} - - m1_rscale = 0.5 - m2_rscale = 0.25 - l1_rscale = 0.5 - l2_rscale = 0.25 - - # {{{ apply P2M - - p2m_source_boxes = actx.from_numpy(np.array([0], dtype=np.int32)) - - # These are indexed by global box numbers. - p2m_box_source_starts = actx.from_numpy( - np.array([0, 0, 0, 0], dtype=np.int32)) - p2m_box_source_counts_nonchild = actx.from_numpy( - np.array([nsources, 0, 0, 0], dtype=np.int32)) - - mpoles = p2m(actx, - source_boxes=p2m_source_boxes, - box_source_starts=p2m_box_source_starts, - box_source_counts_nonchild=p2m_box_source_counts_nonchild, - centers=centers, - sources=sources, - strengths=(strengths,), - nboxes=nboxes, - rscale=m1_rscale, - tgt_base_ibox=0, - **extra_kwargs) - - # }}} - - pot = eval_at(m2p, 0, m1_rscale) - - err = la.norm((pot - pot_direct) / res**2) - err = err / (la.norm(pot_direct) / res**2) + logger.info("Centers: %s", centers[:, 0].shape) + p2m = t.multipole_expand(actx, p, centers[:, 0], + order=order, rscale=m1_rscale) + p2m2p = p2m.eval(actx, targets) + err = la.norm((p2m2p - p2p) / res**2) + err = err / (la.norm(p2p) / res**2) pconv_verifier_p2m2p.add_data_point(order, err) - # {{{ apply M2M - - m2m_target_boxes = actx.from_numpy(np.array([1], dtype=np.int32)) - m2m_src_box_starts = actx.from_numpy(np.array([0, 1], dtype=np.int32)) - m2m_src_box_lists = actx.from_numpy(np.array([0], dtype=np.int32)) - - mpoles = m2m(actx, - src_expansions=mpoles, - src_base_ibox=0, - tgt_base_ibox=0, - ntgt_level_boxes=mpoles.shape[0], - - target_boxes=m2m_target_boxes, - - src_box_starts=m2m_src_box_starts, - src_box_lists=m2m_src_box_lists, - centers=centers, - - src_rscale=m1_rscale, - tgt_rscale=m2_rscale, - **extra_kwargs) - - # }}} - - pot = eval_at(m2p, 1, m2_rscale) - - err = la.norm((pot - pot_direct)/res**2) - err = err / (la.norm(pot_direct) / res**2) - + p2m2m = t.multipole_expand(actx, p2m, centers[:, 1], + order=order, rscale=m2_rscale) + p2m2m2p = p2m2m.eval(actx, targets) + err = la.norm((p2m2m2p - p2p)/res**2) + err = err / (la.norm(p2p) / res**2) pconv_verifier_p2m2m2p.add_data_point(order, err) - # {{{ apply M2L - - m2l_target_boxes = actx.from_numpy(np.array([2], dtype=np.int32)) - m2l_src_box_starts = actx.from_numpy(np.array([0, 1], dtype=np.int32)) - m2l_src_box_lists = actx.from_numpy(np.array([1], dtype=np.int32)) - - mpoles = m2l(actx, - src_expansions=mpoles, - src_base_ibox=0, - tgt_base_ibox=0, - ntgt_level_boxes=mpoles.shape[0], - - target_boxes=m2l_target_boxes, - src_box_starts=m2l_src_box_starts, - src_box_lists=m2l_src_box_lists, - centers=centers, - - src_rscale=m2_rscale, - tgt_rscale=l1_rscale, - **extra_kwargs) - - # }}} - - pot = eval_at(l2p, 2, l1_rscale) - - err = la.norm((pot - pot_direct)/res**2) - err = err / (la.norm(pot_direct) / res**2) - + p2m2m2l = t.local_expand(actx, p2m2m, centers[:, 2], + order=order, rscale=l1_rscale) + p2m2m2l2p = p2m2m2l.eval(actx, targets) + err = la.norm((p2m2m2l2p - p2p)/res**2) + err = err / (la.norm(p2p) / res**2) pconv_verifier_p2m2m2l2p.add_data_point(order, err) - # {{{ apply L2L - - l2l_target_boxes = actx.from_numpy(np.array([3], dtype=np.int32)) - l2l_src_box_starts = actx.from_numpy(np.array([0, 1], dtype=np.int32)) - l2l_src_box_lists = actx.from_numpy(np.array([2], dtype=np.int32)) - - mpoles = l2l(actx, - src_expansions=mpoles, - src_base_ibox=0, - tgt_base_ibox=0, - ntgt_level_boxes=mpoles.shape[0], - - target_boxes=l2l_target_boxes, - src_box_starts=l2l_src_box_starts, - src_box_lists=l2l_src_box_lists, - centers=centers, - - src_rscale=l1_rscale, - tgt_rscale=l2_rscale, - **extra_kwargs) - - # }}} - - pot = eval_at(l2p, 3, l2_rscale) - - err = la.norm((pot - pot_direct)/res**2) - err = err / (la.norm(pot_direct) / res**2) - + p2m2m2l2l = t.local_expand(actx, p2m2m2l, centers[:, 3], + order=order, rscale=l2_rscale) + p2m2m2l2l2p = p2m2m2l2l.eval(actx, targets) + err = la.norm((p2m2m2l2l2p - p2p)/res**2) + err = err / (la.norm(p2p) / res**2) pconv_verifier_full.add_data_point(order, err) for name, verifier in [ From 32173e000efef4a83acdd966d22ca519b85fb3fd Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 24 Feb 2025 15:54:43 +0200 Subject: [PATCH 48/59] fix some type annotations --- sumpy/__init__.py | 13 ++++++++---- sumpy/fmm.py | 3 ++- sumpy/p2p.py | 50 ++++++++++++++++++++++++++--------------------- sumpy/qbx.py | 12 ++++++------ sumpy/tools.py | 4 ++-- sumpy/toys.py | 17 ++++++---------- 6 files changed, 53 insertions(+), 46 deletions(-) diff --git a/sumpy/__init__.py b/sumpy/__init__.py index e09bd6ff4..3acd93aa1 100644 --- a/sumpy/__init__.py +++ b/sumpy/__init__.py @@ -24,6 +24,7 @@ """ import os +from typing import TYPE_CHECKING from pytools.persistent_dict import WriteOncePersistentDict @@ -42,6 +43,12 @@ from sumpy.version import VERSION_TEXT +if TYPE_CHECKING: + from collections.abc import Hashable + + import loopy as lp + + __all__ = [ "P2P", "E2EFromCSR", @@ -59,8 +66,8 @@ ] -code_cache = WriteOncePersistentDict( - f"sumpy-code-cache-v6-{VERSION_TEXT}", safe_sync=False) +code_cache: WriteOncePersistentDict[Hashable, lp.TranslationUnit] = ( + WriteOncePersistentDict(f"sumpy-code-cache-v8-{VERSION_TEXT}", safe_sync=False)) # {{{ optimization control @@ -80,8 +87,6 @@ def set_optimization_enabled(flag): # {{{ cache control -CACHING_ENABLED = True - CACHING_ENABLED = ( "SUMPY_NO_CACHE" not in os.environ and "CG_NO_CACHE" not in os.environ) diff --git a/sumpy/fmm.py b/sumpy/fmm.py index e9c93b8a0..79bfa295f 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -76,6 +76,7 @@ from boxtree.traversal import FMMTraversalInfo from numpy.typing import DTypeLike + import pyopencl from arraycontext import Array from sumpy.array_context import PyOpenCLArrayContext @@ -811,7 +812,7 @@ def multipole_to_local(self, local_exps_view_func = self.local_expansions_view for lev in range(self.tree.nlevels): - wait_for = [] + wait_for: list[pyopencl.Event] = [] start, stop = level_start_target_box_nrs[lev:lev+2] if start == stop: diff --git a/sumpy/p2p.py b/sumpy/p2p.py index e7f7712cf..f5c707e03 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -191,7 +191,10 @@ def get_default_src_tgt_arguments(self): if self.exclude_self else []) + gather_loopy_source_arguments(self.source_kernels)) - def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array): + def get_optimized_kernel(self, *, + targets_is_obj_array: bool = False, + sources_is_obj_array: bool = False, + **kwargs: Any) -> lp.TranslationUnit: # FIXME knl = self.get_kernel() @@ -202,10 +205,8 @@ def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array): knl = lp.split_iname(knl, "itgt", 1024, outer_tag="g.0") knl = self._allow_redundant_execution_of_knl_scaling(knl) - knl = lp.set_options(knl, - enforce_variable_access_ordered="no_check") + knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") - knl = register_optimization_preambles(knl, self.device) return knl # }}} @@ -501,9 +502,11 @@ def default_name(self): return "p2p_from_csr" @override - def get_kernel(self, - max_nsources_in_one_box: int, max_ntargets_in_one_box: int, *, - work_items_per_group: int = 32, is_gpu: bool = False): + def get_kernel(self, *, + max_nsources_in_one_box: int = 32, + max_ntargets_in_one_box: int = 32, + work_items_per_group: int = 32, + is_gpu: bool = False, **kwargs: Any) -> lp.TranslationUnit: loopy_insns, _result_names = self.get_loopy_insns_and_result_names() arguments = [ *self.get_default_src_tgt_arguments(), @@ -700,8 +703,10 @@ def get_kernel(self, "noutputs": len(self.target_kernels)}, ) - loopy_knl = lp.add_dtypes( - loopy_knl, {"nsources": np.int32, "ntargets": np.int32}) + loopy_knl = lp.add_dtypes(loopy_knl, { + "nsources": np.dtype(np.int32), + "ntargets": np.dtype(np.int32), + }) loopy_knl = lp.tag_inames(loopy_knl, "idim*:unr") loopy_knl = lp.tag_inames(loopy_knl, "istrength*:unr") @@ -714,20 +719,24 @@ def get_kernel(self, return loopy_knl @override - def get_optimized_kernel(self, + def get_optimized_kernel(self, *, max_nsources_in_one_box: int = 32, max_ntargets_in_one_box: int = 32, strength_dtype: np.dtype[Any] | None = None, source_dtype: np.dtype[Any] | None = None, local_mem_size: int = 32, is_gpu: bool = False, **kwargs) -> lp.TranslationUnit: - if not is_gpu: - knl = self.get_kernel(max_nsources_in_one_box, - max_ntargets_in_one_box, is_gpu=is_gpu) + knl = self.get_kernel( + max_nsources_in_one_box=max_nsources_in_one_box, + max_ntargets_in_one_box=max_ntargets_in_one_box, + is_gpu=is_gpu) knl = lp.split_iname(knl, "itgt_box", 4, outer_tag="g.0") knl = self._allow_redundant_execution_of_knl_scaling(knl) else: + assert strength_dtype is not None + assert source_dtype is not None + dtype_size = np.dtype(strength_dtype).alignment work_items_per_group = min(256, max_ntargets_in_one_box) total_local_mem = max_nsources_in_one_box * \ @@ -736,8 +745,9 @@ def get_optimized_kernel(self, # can be scheduled at the same time for latency hiding nprefetch = (2 * total_local_mem - 1) // local_mem_size + 1 - knl = self.get_kernel(max_nsources_in_one_box, - max_ntargets_in_one_box, + knl = self.get_kernel( + max_nsources_in_one_box=max_nsources_in_one_box, + max_ntargets_in_one_box=max_ntargets_in_one_box, work_items_per_group=work_items_per_group, is_gpu=is_gpu) knl = lp.tag_inames(knl, {"itgt_box": "g.0", "inner": "l.0"}) @@ -799,12 +809,8 @@ def get_optimized_kernel(self, knl = lp.add_inames_to_insn(knl, "inner", "id:init_* or id:*_scaling or id:src_box_insn_*") knl = lp.add_inames_to_insn(knl, "itgt_box", "id:*_scaling") - # knl = lp.set_options(knl, write_code=True) - knl = lp.set_options(knl, - enforce_variable_access_ordered="no_check") - - knl = register_optimization_preambles(knl, self.device) + knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") return knl def __call__(self, @@ -820,7 +826,8 @@ def __call__(self, is_gpu = not is_cl_cpu(actx) if is_gpu: - dtype_size = kwargs.get("sources")[0].dtype.alignment + source_dtype = kwargs["sources"][0].dtype + strength_dtype = kwargs["strength"].dtype else: # these are unused for not GPU and defeats the caching # set them to None to keep the caching across dtypes @@ -830,7 +837,6 @@ def __call__(self, knl = self.get_cached_kernel_executor( max_nsources_in_one_box=max_nsources_in_one_box, max_ntargets_in_one_box=max_ntargets_in_one_box, - dtype_size=dtype_size, local_mem_size=actx.queue.device.local_mem_size, is_gpu=is_gpu, source_dtype=source_dtype, diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 1fe391494..c542cbbe7 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -28,7 +28,7 @@ import logging from abc import ABC -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import numpy as np from typing_extensions import override @@ -209,14 +209,14 @@ def get_default_src_tgt_arguments(self): ] def get_optimized_kernel(self, *, - is_cpu: bool, - targets_is_obj_array: bool, - sources_is_obj_array: bool, - centers_is_obj_array: bool, + is_cpu: bool = True, + targets_is_obj_array: bool = False, + sources_is_obj_array: bool = False, + centers_is_obj_array: bool = False, # Used by pytential to override the name of the loop to be # parallelized. In the case of QBX, that's the loop over QBX # targets (not global targets). - itgt_name: str = "itgt"): + itgt_name: str = "itgt", **kwargs: Any) -> lp.TranslationUnit: # FIXME specialize/tune for GPU/CPU loopy_knl = self.get_kernel() diff --git a/sumpy/tools.py b/sumpy/tools.py index bfabecc48..449b647f9 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -448,11 +448,11 @@ def get_cache_key(self) -> tuple[Hashable, ...]: ... @abstractmethod - def get_kernel(self) -> lp.TranslationUnit: + def get_kernel(self, **kwargs: Any) -> lp.TranslationUnit: ... @abstractmethod - def get_optimized_kernel(self) -> lp.TranslationUnit: + def get_optimized_kernel(self, **kwargs: Any) -> lp.TranslationUnit: ... @memoize_method diff --git a/sumpy/toys.py b/sumpy/toys.py index 4a6bc408e..7e8eefda0 100644 --- a/sumpy/toys.py +++ b/sumpy/toys.py @@ -527,8 +527,7 @@ def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: def __neg__(self) -> PotentialSource: return -1*self - def __add__(self, other: Number_ish | PotentialSource - ) -> PotentialSource: + def __add__(self, other: Number_ish | PotentialSource) -> PotentialSource: if isinstance(other, Number | np.number): other = ConstantPotential(self.toy_ctx, other) elif not isinstance(other, PotentialSource): @@ -538,19 +537,15 @@ def __add__(self, other: Number_ish | PotentialSource __radd__ = __add__ - def __sub__(self, - other: Number_ish | PotentialSource) -> PotentialSource: + def __sub__(self, other: Number_ish | PotentialSource) -> PotentialSource: return self.__add__(-other) - # FIXME: mypy says " Forward operator "__sub__" is not callable" - # I don't know what it means. -AK, 2024-07-18 def __rsub__(self, # type:ignore[misc] other: Number_ish | PotentialSource - ) -> PotentialSource: + ) -> PotentialSource: return (-self).__add__(other) - def __mul__(self, - other: Number_ish | PotentialSource) -> PotentialSource: + def __mul__(self, other: Number_ish | PotentialSource) -> PotentialSource: if isinstance(other, Number | np.number): other = ConstantPotential(self.toy_ctx, other) elif not isinstance(other, PotentialSource): @@ -742,9 +737,9 @@ class Sum(PotentialExpressionNode): @override def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: - result = 0 + result = np.zeros(targets.shape[1]) for psource in self.psources: - result = result + psource.eval(actx, targets) + result += psource.eval(actx, targets) return result From 28cf7cd858db8ca71ba359d822f93bc7d371c04b Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Mon, 24 Feb 2025 16:00:43 +0200 Subject: [PATCH 49/59] fix some pylint errors --- sumpy/e2e.py | 1 - sumpy/tools.py | 1 - 2 files changed, 2 deletions(-) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 8e01a5356..cbfc3ddcd 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -51,7 +51,6 @@ .. autoclass:: E2EFromCSR .. autoclass:: E2EFromParent .. autoclass:: E2EFromChildren - """ diff --git a/sumpy/tools.py b/sumpy/tools.py index 449b647f9..9e484bc99 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -440,7 +440,6 @@ def __eq__(self, other): class KernelCacheMixin(ABC): - context: cl.Context name: str @abstractmethod From de738c4259f9ad83822da70bf697d3e827855d31 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 4 Dec 2025 15:07:14 -0600 Subject: [PATCH 50/59] Drop register_optimization_preambles, handle FP contract in actx --- sumpy/array_context.py | 23 ++++++++++++++++++++++- sumpy/codegen.py | 22 ---------------------- sumpy/e2e.py | 7 ------- sumpy/e2p.py | 3 --- sumpy/p2e.py | 2 -- sumpy/p2p.py | 13 ------------- sumpy/tools.py | 1 - 7 files changed, 22 insertions(+), 49 deletions(-) diff --git a/sumpy/array_context.py b/sumpy/array_context.py index de5d8074c..2e3091516 100644 --- a/sumpy/array_context.py +++ b/sumpy/array_context.py @@ -26,7 +26,9 @@ from typing import TYPE_CHECKING, Any from boxtree.array_context import PyOpenCLArrayContext as PyOpenCLArrayContextBase +from typing_extensions import override +import loopy as lp from arraycontext.pytest import ( _PytestPyOpenCLArrayContextFactoryWithClass, register_pytest_array_context_factory, @@ -34,8 +36,12 @@ if TYPE_CHECKING: + from collections.abc import Iterator + from numpy.typing import DTypeLike + from loopy import TranslationUnit + from loopy.codegen import PreambleInfo from pytools.tag import ToTagSetConvertible @@ -86,8 +92,23 @@ def make_loopy_program( tags=tags) +def _fp_contract_fast_preamble( + preamble_info: PreambleInfo + ) -> Iterator[tuple[str, str]]: + yield ("fp_contract_fast_pocl", "#pragma clang fp contract(fast)") + + class PyOpenCLArrayContext(PyOpenCLArrayContextBase): - def transform_loopy_program(self, t_unit): + @override + def transform_loopy_program(self, t_unit: TranslationUnit): + import pyopencl as cl + device = self.queue.device + if (device.platform.name == "Portable Computing Language" + and (device.type & cl.device_type.GPU)): + t_unit = lp.register_preamble_generators( + t_unit, + [_fp_contract_fast_preamble]) + return t_unit diff --git a/sumpy/codegen.py b/sumpy/codegen.py index e3e071a62..517c0ad84 100644 --- a/sumpy/codegen.py +++ b/sumpy/codegen.py @@ -47,8 +47,6 @@ from numpy.typing import DTypeLike - import pyopencl as cl - from loopy.codegen import PreambleInfo from loopy.target import TargetBase from loopy.translation_unit import CallablesInferenceContext from loopy.types import LoopyType @@ -249,26 +247,6 @@ def register_bessel_callables(loopy_knl: lp.TranslationUnit) -> lp.TranslationUn return loopy_knl - -def _fp_contract_fast_preamble( - preamble_info: PreambleInfo - ) -> Iterator[tuple[str, str]]: - yield ("fp_contract_fast_pocl", "#pragma clang fp contract(fast)") - - -def register_optimization_preambles( - loopy_knl: lp.TranslationUnit, device: cl.Device - ) -> lp.TranslationUnit: - if isinstance(loopy_knl.target, lp.PyOpenCLTarget): - import pyopencl as cl - if (device.platform.name == "Portable Computing Language" - and (device.type & cl.device_type.GPU)): - loopy_knl = lp.register_preamble_generators( - loopy_knl, - [_fp_contract_fast_preamble]) - - return loopy_knl - # }}} diff --git a/sumpy/e2e.py b/sumpy/e2e.py index cbfc3ddcd..20847682f 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -35,7 +35,6 @@ import sumpy.symbolic as sym from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program -from sumpy.codegen import register_optimization_preambles from sumpy.tools import KernelCacheMixin, to_complex_dtype @@ -138,7 +137,6 @@ def get_optimized_kernel(self): # FIXME knl = self.get_kernel() knl = lp.split_iname(knl, "itgt_box", 64, outer_tag="g.0", inner_tag="l.0") - knl = register_optimization_preambles(knl, self.device) return knl @@ -266,7 +264,6 @@ def get_optimized_kernel(self): # FIXME knl = self.get_kernel() knl = lp.split_iname(knl, "itgt_box", 64, outer_tag="g.0", inner_tag="l.0") - knl = register_optimization_preambles(knl, self.device) return knl @@ -511,7 +508,6 @@ def get_optimized_kernel(self, result_dtype): knl = self.get_kernel(result_dtype) knl = self.tgt_expansion.m2l_translation.optimize_loopy_kernel( knl, self.tgt_expansion, self.src_expansion) - knl = register_optimization_preambles(knl, self.device) return knl @@ -625,7 +621,6 @@ def get_optimized_kernel(self, result_dtype): knl = self.get_kernel(result_dtype) knl = lp.tag_inames(knl, "idim*:unr") knl = lp.tag_inames(knl, {"itr_class": "g.0"}) - knl = register_optimization_preambles(knl, self.device) return knl @@ -732,7 +727,6 @@ def get_optimized_kernel(self, result_dtype): _, optimizations = self.get_inner_knl_and_optimizations(result_dtype) for optimization in optimizations: knl = optimization(knl) - knl = register_optimization_preambles(knl, self.device) return knl def __call__(self, actx: PyOpenCLArrayContext, **kwargs): @@ -834,7 +828,6 @@ def get_optimized_kernel(self, result_dtype): for optimization in optimizations: knl = optimization(knl) knl = lp.add_inames_for_unused_hw_axes(knl) - knl = register_optimization_preambles(knl, self.device) return knl def __call__(self, actx: PyOpenCLArrayContext, **kwargs): diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 538e2024d..2602b87fc 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -32,7 +32,6 @@ from pytools.obj_array import make_obj_array from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program -from sumpy.codegen import register_optimization_preambles from sumpy.tools import KernelCacheMixin, gather_loopy_arguments @@ -201,7 +200,6 @@ def get_optimized_kernel(self): knl = lp.add_inames_to_insn(knl, "itgt_box", "id:kernel_scaling") knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") - knl = register_optimization_preambles(knl, self.device) return knl @@ -330,7 +328,6 @@ def get_optimized_kernel(self): knl = lp.add_inames_to_insn(knl, "itgt_box", "id:kernel_scaling") knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") - knl = register_optimization_preambles(knl, self.device) return knl diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 77cfd3c36..f50a8197f 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -30,7 +30,6 @@ import loopy as lp from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program -from sumpy.codegen import register_optimization_preambles from sumpy.tools import KernelCacheMixin, KernelComputation @@ -122,7 +121,6 @@ def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array): knl = self._allow_redundant_execution_of_knl_scaling(knl) knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") - knl = register_optimization_preambles(knl, self.device) return knl def __call__(self, actx: PyOpenCLArrayContext, **kwargs): diff --git a/sumpy/p2p.py b/sumpy/p2p.py index f5c707e03..6741b1ac1 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -36,7 +36,6 @@ import pytools.obj_array as obj_array from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program -from sumpy.codegen import register_optimization_preambles from sumpy.tools import KernelCacheMixin, KernelComputation, is_obj_array_like @@ -277,9 +276,6 @@ def __call__(self, targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) - from sumpy.codegen import register_optimization_preambles - knl = register_optimization_preambles(knl, actx.queue.device) - result = actx.call_loopy( knl, sources=sources, @@ -353,9 +349,6 @@ def __call__(self, targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) - from sumpy.codegen import register_optimization_preambles - knl = register_optimization_preambles(knl, actx.queue.device) - result = actx.call_loopy(knl, sources=sources, targets=targets, **kwargs) return obj_array.new_1d([result[f"result_{i}"] for i in range(self.nresults)]) @@ -478,9 +471,6 @@ def __call__(self, targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) - from sumpy.codegen import register_optimization_preambles - knl = register_optimization_preambles(knl, actx.queue.device) - result = actx.call_loopy( knl, targets=targets, @@ -843,9 +833,6 @@ def __call__(self, strength_dtype=strength_dtype, ) - from sumpy.codegen import register_optimization_preambles - knl = register_optimization_preambles(knl, actx.queue.device) - result = actx.call_loopy(knl, targets=targets, sources=sources, **kwargs) return obj_array.new_1d([result[f"result_s{i}"] for i in range(self.nresults)]) diff --git a/sumpy/tools.py b/sumpy/tools.py index 9e484bc99..aef26b4f6 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -53,7 +53,6 @@ from optype.numpy import Array2D import pyopencl - import pyopencl as cl from pymbolic.primitives import Variable from pymbolic.typing import Expression From 494220f51dea938895214fe06a35391501974022 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 4 Dec 2025 15:08:12 -0600 Subject: [PATCH 51/59] Fix some make_obj_array stragglers --- sumpy/e2p.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 2602b87fc..dd4e555d9 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -28,8 +28,8 @@ import numpy as np import loopy as lp +import pytools.obj_array as obj_array from loopy.version import MOST_RECENT_LANGUAGE_VERSION # noqa: F401 -from pytools.obj_array import make_obj_array from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program from sumpy.tools import KernelCacheMixin, gather_loopy_arguments @@ -223,7 +223,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): knl, centers=centers, rscale=rscale, **kwargs) - return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) + return obj_array.new_1d([result[f"result_s{i}"] for i in range(self.nresults)]) # }}} @@ -344,7 +344,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): rscale=rscale, **kwargs) - return make_obj_array([result[f"result_s{i}"] for i in range(self.nresults)]) + return obj_array.new_1d([result[f"result_s{i}"] for i in range(self.nresults)]) # }}} From 57dc042cd416e6b15cce163ff465e229313688e7 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 4 Dec 2025 15:08:54 -0600 Subject: [PATCH 52/59] get_cached_kernel_executor -> get_cached_kernel --- sumpy/e2e.py | 14 +++++++------- sumpy/e2p.py | 4 ++-- sumpy/p2e.py | 2 +- sumpy/p2p.py | 9 ++++----- sumpy/qbx.py | 6 +++--- sumpy/tools.py | 6 +++--- 6 files changed, 20 insertions(+), 21 deletions(-) diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 20847682f..2f9df0553 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -282,7 +282,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): src_rscale = centers.dtype.type(kwargs.pop("src_rscale")) tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) - knl = self.get_cached_kernel_executor() + knl = self.get_cached_kernel() result = actx.call_loopy( knl, centers=centers, @@ -527,7 +527,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) src_expansions = kwargs.pop("src_expansions") - knl = self.get_cached_kernel_executor(result_dtype=src_expansions.dtype) + knl = self.get_cached_kernel(result_dtype=src_expansions.dtype) result = actx.call_loopy( knl, src_expansions=src_expansions, @@ -641,7 +641,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): "m2l_translation_classes_dependent_data") result_dtype = m2l_translation_classes_dependent_data.dtype - knl = self.get_cached_kernel_executor(result_dtype=result_dtype) + knl = self.get_cached_kernel(result_dtype=result_dtype) result = actx.call_loopy( knl, src_rscale=src_rscale, @@ -737,7 +737,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): preprocessed_src_expansions = kwargs.pop("preprocessed_src_expansions") result_dtype = preprocessed_src_expansions.dtype - knl = self.get_cached_kernel_executor(result_dtype=result_dtype) + knl = self.get_cached_kernel(result_dtype=result_dtype) result = actx.call_loopy( knl, preprocessed_src_expansions=preprocessed_src_expansions, @@ -838,7 +838,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): tgt_expansions = kwargs.pop("tgt_expansions") result_dtype = tgt_expansions.dtype - knl = self.get_cached_kernel_executor(result_dtype=result_dtype) + knl = self.get_cached_kernel(result_dtype=result_dtype) result = actx.call_loopy( knl, tgt_expansions=tgt_expansions, @@ -958,7 +958,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): src_rscale = centers.dtype.type(kwargs.pop("src_rscale")) tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) - knl = self.get_cached_kernel_executor() + knl = self.get_cached_kernel() result = actx.call_loopy( knl, centers=centers, @@ -1065,7 +1065,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): src_rscale = centers.dtype.type(kwargs.pop("src_rscale")) tgt_rscale = centers.dtype.type(kwargs.pop("tgt_rscale")) - knl = self.get_cached_kernel_executor() + knl = self.get_cached_kernel() result = actx.call_loopy( knl, centers=centers, diff --git a/sumpy/e2p.py b/sumpy/e2p.py index dd4e555d9..e0106b296 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -218,7 +218,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): # meaningfully inferred. Make the type of rscale explicit. rscale = centers.dtype.type(kwargs.pop("rscale")) - knl = self.get_cached_kernel_executor() + knl = self.get_cached_kernel() result = actx.call_loopy( knl, centers=centers, rscale=rscale, **kwargs) @@ -337,7 +337,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): # meaningfully inferred. Make the type of rscale explicit. rscale = centers.dtype.type(kwargs.pop("rscale")) - knl = self.get_cached_kernel_executor() + knl = self.get_cached_kernel() result = actx.call_loopy( knl, centers=centers, diff --git a/sumpy/p2e.py b/sumpy/p2e.py index f50a8197f..10eeb8343 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -133,7 +133,7 @@ def __call__(self, actx: PyOpenCLArrayContext, **kwargs): dtype = centers[0].dtype if is_obj_array_like(centers) else centers.dtype rscale = dtype.type(kwargs.pop("rscale")) - knl = self.get_cached_kernel_executor( + knl = self.get_cached_kernel( sources_is_obj_array=is_obj_array_like(sources), centers_is_obj_array=is_obj_array_like(centers)) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 6741b1ac1..acd3a6f58 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -272,7 +272,7 @@ def __call__(self, strength: Sequence[Array], **kwargs: Any, ) -> tuple[cl.Event, Sequence[Array]]: - knl = self.get_cached_kernel_executor( + knl = self.get_cached_kernel( targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) @@ -345,7 +345,7 @@ def __call__(self, sources: ObjectArray1D[Array] | Array, **kwargs: Any, ) -> Sequence[Array]: - knl = self.get_cached_kernel_executor( + knl = self.get_cached_kernel( targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) @@ -439,7 +439,6 @@ def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array): knl = self._allow_redundant_execution_of_knl_scaling(knl) knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") - knl = register_optimization_preambles(knl, self.device) return knl @@ -467,7 +466,7 @@ def __call__(self, :returns: a one-dimensional array of interactions, for each index pair in (*srcindices*, *tgtindices*) """ - knl = self.get_cached_kernel_executor( + knl = self.get_cached_kernel( targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) @@ -824,7 +823,7 @@ def __call__(self, source_dtype = None strength_dtype = None - knl = self.get_cached_kernel_executor( + knl = self.get_cached_kernel( max_nsources_in_one_box=max_nsources_in_one_box, max_ntargets_in_one_box=max_ntargets_in_one_box, local_mem_size=actx.queue.device.local_mem_size, diff --git a/sumpy/qbx.py b/sumpy/qbx.py index c542cbbe7..ad7088451 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -317,7 +317,7 @@ def __call__(self, actx: PyOpenCLArrayContext, already multiplied in. """ - knl = self.get_cached_kernel_executor( + knl = self.get_cached_kernel( is_cpu=is_cl_cpu(actx), targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources), @@ -396,7 +396,7 @@ def get_kernel(self): def __call__(self, actx: PyOpenCLArrayContext, targets, sources, centers, expansion_radii, **kwargs): - knl = self.get_cached_kernel_executor( + knl = self.get_cached_kernel( is_cpu=is_cl_cpu(actx), targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources), @@ -525,7 +525,7 @@ def __call__(self, actx: PyOpenCLArrayContext, in (*srcindices*, *tgtindices*) """ - knl = self.get_cached_kernel_executor( + knl = self.get_cached_kernel( targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources), centers_is_obj_array=is_obj_array_like(centers)) diff --git a/sumpy/tools.py b/sumpy/tools.py index aef26b4f6..fb9c70690 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -454,7 +454,7 @@ def get_optimized_kernel(self, **kwargs: Any) -> lp.TranslationUnit: ... @memoize_method - def get_cached_kernel_executor(self, **kwargs) -> lp.ExecutorBase: + def get_cached_kernel(self, **kwargs) -> lp.TranslationUnit: from sumpy import CACHING_ENABLED, NO_CACHE_KERNELS, OPT_ENABLED, code_cache if CACHING_ENABLED and not ( @@ -472,7 +472,7 @@ def get_cached_kernel_executor(self, **kwargs) -> lp.ExecutorBase: try: result = code_cache[cache_key] logger.debug("%s: kernel cache hit [key=%s]", self.name, cache_key) - return result.executor(self.context) + return result except KeyError: pass @@ -493,7 +493,7 @@ def get_cached_kernel_executor(self, **kwargs) -> lp.ExecutorBase: NO_CACHE_KERNELS and self.name in NO_CACHE_KERNELS): code_cache.store_if_not_present(cache_key, knl) - return knl.executor(self.context) + return knl @staticmethod def _allow_redundant_execution_of_knl_scaling( From a57eb873c70c85c21860079a33347861879fc36b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 9 Dec 2025 14:20:15 -0600 Subject: [PATCH 53/59] Fix toys --- sumpy/toys.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sumpy/toys.py b/sumpy/toys.py index 7e8eefda0..1eb563608 100644 --- a/sumpy/toys.py +++ b/sumpy/toys.py @@ -289,7 +289,7 @@ def _p2e(actx, psource, center, rscale, order: int, p2e, expn_class, expn_kwargs centers = actx.from_numpy( np.array(center, dtype=np.float64).reshape(toy_ctx.kernel.dim, 1)) - coeffs, = p2e( + coeffs = p2e( actx, source_boxes=source_boxes, box_source_starts=box_source_starts, @@ -379,7 +379,7 @@ def _e2e(actx: PyOpenCLArrayContext, **toy_ctx.extra_kernel_kwargs, } - to_coeffs, = e2e(**args) + to_coeffs = e2e(**args) return expn_class( toy_ctx, to_center, to_rscale, to_order, actx.to_numpy(to_coeffs[1]), @@ -414,12 +414,12 @@ def _m2l(actx: PyOpenCLArrayContext, if toy_ctx.use_fft: - fft_app = get_opencl_fft_app(actx, (expn_size,), + fft_app = get_opencl_fft_app(actx, (1, expn_size,), dtype=preprocessed_src_expansions.dtype, inverse=False) - ifft_app = get_opencl_fft_app(actx, (expn_size,), + ifft_app = get_opencl_fft_app(actx, (1, expn_size,), dtype=preprocessed_src_expansions.dtype, inverse=True) - preprocessed_src_expansions = run_opencl_fft(actx, fft_app, + _evt, preprocessed_src_expansions = run_opencl_fft(actx, fft_app, preprocessed_src_expansions, inverse=False) # Compute translation classes data @@ -443,7 +443,7 @@ def _m2l(actx: PyOpenCLArrayContext, **toy_ctx.extra_kernel_kwargs) if toy_ctx.use_fft: - m2l_translation_classes_dependent_data = run_opencl_fft( + _evt, m2l_translation_classes_dependent_data = run_opencl_fft( actx, fft_app, m2l_translation_classes_dependent_data, inverse=False) @@ -739,7 +739,7 @@ class Sum(PotentialExpressionNode): def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: result = np.zeros(targets.shape[1]) for psource in self.psources: - result += psource.eval(actx, targets) + result = result + psource.eval(actx, targets) return result @@ -753,7 +753,7 @@ class Product(PotentialExpressionNode): def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: result = np.ones(targets.shape[1]) for psource in self.psources: - result *= psource.eval(actx, targets) + result = result * psource.eval(actx, targets) return result # }}} From 53b6fc31e5b23cb790d831021717cc931ec78005 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 9 Dec 2025 14:30:33 -0600 Subject: [PATCH 54/59] Fix jump and target derivative tests --- sumpy/test/test_kernels.py | 4 ++-- sumpy/test/test_target_deriv.py | 27 +++++++++++++-------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/sumpy/test/test_kernels.py b/sumpy/test/test_kernels.py index 6f5f8538e..2167a56a0 100644 --- a/sumpy/test/test_kernels.py +++ b/sumpy/test/test_kernels.py @@ -902,13 +902,13 @@ def test_jump( dlp_knl = DirectionalSourceDerivative(kernel) from sumpy.qbx import LayerPotential - lpot = LayerPotential(actx.context, + lpot = LayerPotential( expansion=LineTaylorLocalExpansion(kernel, order=order), source_kernels=(dlp_knl,), target_kernels=(kernel,), value_dtypes=np.complex128,) - _evt, (y,) = lpot(actx.queue, + y, = lpot(actx, actx.from_numpy(targets), actx.from_numpy(geo.nodes), actx.from_numpy(centers), diff --git a/sumpy/test/test_target_deriv.py b/sumpy/test/test_target_deriv.py index cff0b69d8..dc03c4318 100644 --- a/sumpy/test/test_target_deriv.py +++ b/sumpy/test/test_target_deriv.py @@ -75,13 +75,11 @@ def test_lpot_dx_jump_relation_convergence( from sumpy.qbx import LayerPotential expansion = LineTaylorLocalExpansion(knl, qbx_order) lplot_dx = LayerPotential( - actx.context, expansion=expansion, target_kernels=(AxisTargetDerivative(0, knl),), source_kernels=(knl,) ) lplot_dy = LayerPotential( - actx.context, expansion=expansion, target_kernels=(AxisTargetDerivative(1, knl),), source_kernels=(knl,) @@ -96,34 +94,35 @@ def test_lpot_dx_jump_relation_convergence( weights_nodes = actx.from_numpy(weights_nodes_h) expansion_radii_h = 4 * target_geo.area_elements / nsources + expansion_radii = actx.from_numpy(expansion_radii_h) centers_in = actx.from_numpy( targets_h - target_geo.normals * expansion_radii_h) centers_out = actx.from_numpy( targets_h + target_geo.normals * expansion_radii_h) strengths = (weights_nodes,) - _, (eval_in_dx,) = lplot_dx( - actx.queue, + (eval_in_dx,) = lplot_dx( + actx, targets, sources, centers_in, strengths, - expansion_radii=expansion_radii_h + expansion_radii=expansion_radii ) - _, (eval_in_dy,) = lplot_dy( - actx.queue, + (eval_in_dy,) = lplot_dy( + actx, targets, sources, centers_in, strengths, - expansion_radii=expansion_radii_h + expansion_radii=expansion_radii ) - _, (eval_out_dx,) = lplot_dx( - actx.queue, + (eval_out_dx,) = lplot_dx( + actx, targets, sources, centers_out, strengths, - expansion_radii=expansion_radii_h + expansion_radii=expansion_radii ) - _, (eval_out_dy,) = lplot_dy( - actx.queue, + (eval_out_dy,) = lplot_dy( + actx, targets, sources, centers_out, strengths, - expansion_radii=expansion_radii_h + expansion_radii=expansion_radii ) eval_in_dx = actx.to_numpy(eval_in_dx) From 43af511b6ff009bbe3abde8ea6eae1e8dcf7416b Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 9 Dec 2025 18:24:18 -0600 Subject: [PATCH 55/59] Fix test_unified_single_and_double --- sumpy/test/test_fmm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sumpy/test/test_fmm.py b/sumpy/test/test_fmm.py index e143545e6..f761bdf67 100644 --- a/sumpy/test/test_fmm.py +++ b/sumpy/test/test_fmm.py @@ -432,7 +432,7 @@ def test_unified_single_and_double(actx_factory: ArrayContextFactory, visualize= source_kernel_vecs, strength_usages, strict=True): source_extra_kwargs = {} if deriv_knl in source_kernels: - source_extra_kwargs["dir_vec"] = actx.from_numpy(dir_vec) + source_extra_kwargs["dir_vec"] = dir_vec tree_indep = SumpyTreeIndependentDataForWrangler( actx, partial(mpole_expn_class, knl), From ee19cf82da354eb42c92c12384f663ad48c3ead4 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 9 Dec 2025 18:29:42 -0600 Subject: [PATCH 56/59] Address pocl 7.x pyvkfft miscompilation https://github.com/pocl/pocl/issues/2069 --- .test-conda-env-py3.yml | 3 ++- sumpy/test/test_fmm.py | 13 ++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index 2249501e3..46ddbbee7 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -8,7 +8,8 @@ dependencies: - numpy - scipy - sympy -- pocl +# https://github.com/pocl/pocl/issues/2069 +- pocl<7 - pocl-cuda - islpy - pyopencl diff --git a/sumpy/test/test_fmm.py b/sumpy/test/test_fmm.py index f761bdf67..5f54e3273 100644 --- a/sumpy/test/test_fmm.py +++ b/sumpy/test/test_fmm.py @@ -34,7 +34,11 @@ import pytest import pytools.obj_array as obj_array -from arraycontext import ArrayContextFactory, pytest_generate_tests_for_array_contexts +from arraycontext import ( + ArrayContextFactory, + PyOpenCLArrayContext, + pytest_generate_tests_for_array_contexts, +) from sumpy.array_context import PytestPyOpenCLArrayContextFactory, _acf # noqa: F401 from sumpy.expansion.local import ( @@ -146,6 +150,13 @@ def _test_sumpy_fmm( actx = actx_factory() + if fft_backend == "pyvkfft": + from pyopencl.characterize import get_pocl_version + if (isinstance(actx, PyOpenCLArrayContext) + and get_pocl_version(actx.queue.device.platform) >= (7,)): + pytest.skip("pocl 7 and pyvkfft don't get along: " + "https://github.com/pocl/pocl/issues/2069") + nsources = 1000 ntargets = 300 dtype = np.float64 From 680aef7a6f2e1a2f486b01c39ea8ef50070f9a9c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 10 Dec 2025 13:40:19 -0600 Subject: [PATCH 57/59] Fix docs --- doc/conf.py | 2 ++ sumpy/tools.py | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 2e3b8ce20..f80009837 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -26,6 +26,7 @@ nitpick_ignore_regex = [ ["py:class", r"symengine\.(.+)"], # :cry: + ["py:class", r"ToTagSetConvertible"], # :cry: ] sphinxconfig_missing_reference_aliases = { @@ -36,6 +37,7 @@ "np.complexfloating": "class:numpy.complexfloating", "np.inexact": "class:numpy.inexact", "np.dtype": "class:numpy.dtype", + "np.number": "class:numpy.number", # pytools "obj_array.ObjectArray1D": "obj:pytools.obj_array.ObjectArray1D", # sympy diff --git a/sumpy/tools.py b/sumpy/tools.py index fb9c70690..8cae66789 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -69,8 +69,6 @@ .. autofunction:: to_complex_dtype .. autofunction:: is_obj_array_like -.. autofunction:: vector_to_device -.. autofunction:: vector_from_device .. autoclass:: OrderedSet Multi-index Helpers From 039119ee059eaf2ba1bf9b87cf259f1cf1ef252e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 10 Dec 2025 15:38:50 -0600 Subject: [PATCH 58/59] Use ArrayContext+assert instead of PyOpenCLActx in annotations --- doc/conf.py | 1 + sumpy/array_context.py | 6 +++++- sumpy/distributed.py | 10 +++++----- sumpy/e2e.py | 21 +++++++++++++-------- sumpy/e2p.py | 11 ++++++++--- sumpy/fmm.py | 35 +++++++++++++++++------------------ sumpy/p2e.py | 13 +++++++++---- sumpy/p2p.py | 25 +++++++++++++------------ sumpy/qbx.py | 10 ++++++---- sumpy/tools.py | 8 ++++++-- sumpy/toys.py | 35 +++++++++++++++++++---------------- 11 files changed, 102 insertions(+), 73 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index f80009837..21e6be08b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -55,6 +55,7 @@ "CallInstruction": "class:loopy.kernel.instruction.CallInstruction", # arraycontext "Array": "obj:arraycontext.Array", + "ArrayContext": "class:arraycontext.ArrayContext", # boxtree "FMMTraversalInfo": "class:boxtree.traversal.FMMTraversalInfo", # sumpy diff --git a/sumpy/array_context.py b/sumpy/array_context.py index 2e3091516..2341c1e5f 100644 --- a/sumpy/array_context.py +++ b/sumpy/array_context.py @@ -40,6 +40,7 @@ from numpy.typing import DTypeLike + from arraycontext import ArrayContext from loopy import TranslationUnit from loopy.codegen import PreambleInfo from pytools.tag import ToTagSetConvertible @@ -112,7 +113,10 @@ def transform_loopy_program(self, t_unit: TranslationUnit): return t_unit -def is_cl_cpu(actx: PyOpenCLArrayContext) -> bool: +def is_cl_cpu(actx: ArrayContext) -> bool: + if not isinstance(actx, PyOpenCLArrayContext): + return False + import pyopencl as cl return all(dev.type & cl.device_type.CPU for dev in actx.context.devices) diff --git a/sumpy/distributed.py b/sumpy/distributed.py index 86435cda2..a7a5e1941 100644 --- a/sumpy/distributed.py +++ b/sumpy/distributed.py @@ -33,13 +33,13 @@ if TYPE_CHECKING: - from sumpy.array_context import PyOpenCLArrayContext + from arraycontext import ArrayContext class DistributedSumpyExpansionWrangler( DistributedExpansionWranglerMixin, SumpyExpansionWrangler): def __init__( - self, actx: PyOpenCLArrayContext, + self, actx: ArrayContext, comm, tree_indep, local_traversal, global_traversal, dtype, fmm_level_to_order, communicate_mpoles_via_allreduce=False, **kwargs): @@ -53,7 +53,7 @@ def __init__( self.communicate_mpoles_via_allreduce = communicate_mpoles_via_allreduce def distribute_source_weights(self, - actx: PyOpenCLArrayContext, src_weight_vecs, src_idx_all_ranks): + actx: ArrayContext, src_weight_vecs, src_idx_all_ranks): src_weight_vecs_host = [ actx.to_numpy(src_weight) for src_weight in src_weight_vecs ] @@ -68,7 +68,7 @@ def distribute_source_weights(self, return local_src_weight_vecs_device def gather_potential_results(self, - actx: PyOpenCLArrayContext, potentials, tgt_idx_all_ranks): + actx: ArrayContext, potentials, tgt_idx_all_ranks): potentials_host_vec = [ actx.to_numpy(potentials_dev) for potentials_dev in potentials ] @@ -109,7 +109,7 @@ def reorder(x): return None def communicate_mpoles(self, - actx: PyOpenCLArrayContext, mpole_exps, return_stats=False): + actx: ArrayContext, mpole_exps, return_stats=False): mpole_exps_host = actx.to_numpy(mpole_exps) stats = super().communicate_mpoles(actx, mpole_exps_host, return_stats) mpole_exps[:] = mpole_exps_host diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 2f9df0553..34b888deb 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -25,6 +25,7 @@ import logging from abc import ABC, abstractmethod +from typing import TYPE_CHECKING import numpy as np from typing_extensions import override @@ -34,10 +35,14 @@ from pytools import memoize_method import sumpy.symbolic as sym -from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program +from sumpy.array_context import make_loopy_program from sumpy.tools import KernelCacheMixin, to_complex_dtype +if TYPE_CHECKING: + from arraycontext import ArrayContext + + logger = logging.getLogger(__name__) @@ -267,7 +272,7 @@ def get_optimized_kernel(self): return knl - def __call__(self, actx: PyOpenCLArrayContext, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): """ :arg src_expansions: :arg src_box_starts: @@ -511,7 +516,7 @@ def get_optimized_kernel(self, result_dtype): return knl - def __call__(self, actx: PyOpenCLArrayContext, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): """ :arg src_expansions: :arg src_box_starts: @@ -624,7 +629,7 @@ def get_optimized_kernel(self, result_dtype): return knl - def __call__(self, actx: PyOpenCLArrayContext, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): """ :arg src_rscale: :arg translation_classes_level_start: @@ -729,7 +734,7 @@ def get_optimized_kernel(self, result_dtype): knl = optimization(knl) return knl - def __call__(self, actx: PyOpenCLArrayContext, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): """ :arg src_expansions :arg preprocessed_src_expansions @@ -830,7 +835,7 @@ def get_optimized_kernel(self, result_dtype): knl = lp.add_inames_for_unused_hw_axes(knl) return knl - def __call__(self, actx: PyOpenCLArrayContext, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): """ :arg tgt_expansions :arg tgt_expansions_before_postprocessing @@ -943,7 +948,7 @@ def get_kernel(self): return loopy_knl - def __call__(self, actx: PyOpenCLArrayContext, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): """ :arg src_expansions: :arg src_box_starts: @@ -1050,7 +1055,7 @@ def get_kernel(self): return loopy_knl - def __call__(self, actx: PyOpenCLArrayContext, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): """ :arg src_expansions: :arg src_box_starts: diff --git a/sumpy/e2p.py b/sumpy/e2p.py index e0106b296..400d7b44c 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -24,6 +24,7 @@ """ from abc import ABC, abstractmethod +from typing import TYPE_CHECKING import numpy as np @@ -31,10 +32,14 @@ import pytools.obj_array as obj_array from loopy.version import MOST_RECENT_LANGUAGE_VERSION # noqa: F401 -from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program +from sumpy.array_context import make_loopy_program from sumpy.tools import KernelCacheMixin, gather_loopy_arguments +if TYPE_CHECKING: + from arraycontext import ArrayContext + + __doc__ = """ Expansion-to-particle @@ -203,7 +208,7 @@ def get_optimized_kernel(self): return knl - def __call__(self, actx: PyOpenCLArrayContext, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): """ :arg expansions: :arg target_boxes: @@ -331,7 +336,7 @@ def get_optimized_kernel(self): return knl - def __call__(self, actx: PyOpenCLArrayContext, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): centers = kwargs.pop("centers") # "1" may be passed for rscale, which won't have its type # meaningfully inferred. Make the type of rscale explicit. diff --git a/sumpy/fmm.py b/sumpy/fmm.py index 79bfa295f..ecb504e5e 100644 --- a/sumpy/fmm.py +++ b/sumpy/fmm.py @@ -77,9 +77,8 @@ from numpy.typing import DTypeLike import pyopencl - from arraycontext import Array + from arraycontext import Array, ArrayContext - from sumpy.array_context import PyOpenCLArrayContext from sumpy.expansion.local import LocalExpansionBase from sumpy.expansion.multipole import MultipoleExpansionBase @@ -114,7 +113,7 @@ class SumpyTreeIndependentDataForWrangler(TreeIndependentDataForWrangler): strength_usage: Sequence[int] | None def __init__(self, - array_context: PyOpenCLArrayContext, + array_context: ArrayContext, multipole_expansion_factory: MultipoleExpansionFromOrderFactory, local_expansion_factory: LocalExpansionFromOrderFactory, target_kernels: Sequence[Kernel], @@ -134,7 +133,7 @@ def __init__(self, """ super().__init__() - self._setup_actx: PyOpenCLArrayContext = array_context + self._setup_actx: ArrayContext = array_context self.multipole_expansion_factory = multipole_expansion_factory self.local_expansion_factory = local_expansion_factory @@ -422,7 +421,7 @@ def order_to_size(order: int): return build_csr_level_starts(self.level_orders, order_to_size, level_starts=self.m2l_translation_class_level_start_box_nrs()) - def multipole_expansion_zeros(self, actx: PyOpenCLArrayContext) -> Array: + def multipole_expansion_zeros(self, actx: ArrayContext) -> Array: """Return an expansions array (which must support addition) capable of holding one multipole or local expansion for every box in the tree. @@ -441,7 +440,7 @@ def local_expansion_zeros(self, actx) -> Array: dtype=self.dtype) def m2l_translation_classes_dependent_data_zeros( - self, actx: PyOpenCLArrayContext): + self, actx: ArrayContext): data_level_starts = ( self.m2l_translation_classes_dependent_data_level_starts()) level_start_box_nrs = ( @@ -497,7 +496,7 @@ def order_to_size(order): level_starts=self.tree_level_start_box_nrs) def m2l_preproc_mpole_expansion_zeros( - self, actx: PyOpenCLArrayContext, template_ary): + self, actx: ArrayContext, template_ary): level_starts = self.m2l_preproc_mpole_expansions_level_starts() result = [] @@ -522,7 +521,7 @@ def m2l_preproc_mpole_expansions_view(self, mpole_exps, level): m2l_work_array_level_starts = m2l_preproc_mpole_expansions_level_starts def output_zeros(self, - actx: PyOpenCLArrayContext + actx: ArrayContext ) -> obj_array.ObjectArray1D[Array]: """Return a potentials array (which must support addition) capable of holding a potential value for each target in the tree. Note that @@ -587,7 +586,7 @@ def box_target_list_kwargs(self): # }}} - def run_opencl_fft(self, actx: PyOpenCLArrayContext, + def run_opencl_fft(self, actx: ArrayContext, input_vec, inverse, wait_for): app = self.tree_indep.opencl_fft_app(input_vec.shape, input_vec.dtype, inverse) @@ -601,7 +600,7 @@ def run_opencl_fft(self, actx: PyOpenCLArrayContext, return result def form_multipoles(self, - actx: PyOpenCLArrayContext, + actx: ArrayContext, level_start_source_box_nrs, source_boxes, src_weight_vecs): mpoles = self.multipole_expansion_zeros(actx) @@ -635,7 +634,7 @@ def form_multipoles(self, return mpoles def coarsen_multipoles(self, - actx: PyOpenCLArrayContext, + actx: ArrayContext, level_start_source_parent_box_nrs, source_parent_boxes, mpoles): @@ -689,7 +688,7 @@ def coarsen_multipoles(self, return mpoles def eval_direct(self, - actx: PyOpenCLArrayContext, + actx: ArrayContext, target_boxes, source_box_starts, source_box_lists, src_weight_vecs): pot = self.output_zeros(actx) @@ -791,7 +790,7 @@ def _add_m2l_precompute_kwargs(self, kwargs_for_m2l, self.translation_classes_data.from_sep_siblings_translation_classes def multipole_to_local(self, - actx: PyOpenCLArrayContext, + actx: ArrayContext, level_start_target_box_nrs, target_boxes, src_box_starts, src_box_lists, mpole_exps): @@ -915,7 +914,7 @@ def multipole_to_local(self, return local_exps def eval_multipoles(self, - actx: PyOpenCLArrayContext, + actx: ArrayContext, target_boxes_by_source_level, source_boxes_by_level, mpole_exps): pot = self.output_zeros(actx) @@ -956,7 +955,7 @@ def eval_multipoles(self, return pot def form_locals(self, - actx: PyOpenCLArrayContext, + actx: ArrayContext, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, starts, lists, src_weight_vecs): local_exps = self.local_expansion_zeros(actx) @@ -997,7 +996,7 @@ def form_locals(self, return local_exps def refine_locals(self, - actx: PyOpenCLArrayContext, + actx: ArrayContext, level_start_target_or_target_parent_box_nrs, target_or_target_parent_boxes, local_exps): @@ -1040,7 +1039,7 @@ def refine_locals(self, return local_exps def eval_locals(self, - actx: PyOpenCLArrayContext, + actx: ArrayContext, level_start_target_box_nrs, target_boxes, local_exps): pot = self.output_zeros(actx) level_start_target_box_nrs = actx.to_numpy(level_start_target_box_nrs) @@ -1077,7 +1076,7 @@ def eval_locals(self, return pot - def finalize_potentials(self, actx: PyOpenCLArrayContext, potentials): + def finalize_potentials(self, actx: ArrayContext, potentials): return potentials # }}} diff --git a/sumpy/p2e.py b/sumpy/p2e.py index 10eeb8343..63d452464 100644 --- a/sumpy/p2e.py +++ b/sumpy/p2e.py @@ -24,15 +24,20 @@ """ import logging +from typing import TYPE_CHECKING import numpy as np import loopy as lp -from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program +from sumpy.array_context import make_loopy_program from sumpy.tools import KernelCacheMixin, KernelComputation +if TYPE_CHECKING: + from arraycontext import ArrayContext + + logger = logging.getLogger(__name__) @@ -123,7 +128,7 @@ def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array): enforce_variable_access_ordered="no_check") return knl - def __call__(self, actx: PyOpenCLArrayContext, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): from sumpy.tools import is_obj_array_like sources = kwargs.pop("sources") centers = kwargs.pop("centers") @@ -238,7 +243,7 @@ def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array): knl = lp.split_iname(knl, "isrc_box", 16, outer_tag="g.0") return knl - def __call__(self, actx: PyOpenCLArrayContext, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): """ :arg source_boxes: an array of integer indices into *box_source_starts* and *box_source_counts_nonchild*. @@ -369,7 +374,7 @@ def get_optimized_kernel(self, sources_is_obj_array, centers_is_obj_array): knl = lp.split_iname(knl, "itgt_box", 16, outer_tag="g.0") return knl - def __call__(self, actx: PyOpenCLArrayContext, **kwargs): + def __call__(self, actx: ArrayContext, **kwargs): """ :arg target_boxes: array of integer indices into *source_box_starts* and *centers*. diff --git a/sumpy/p2p.py b/sumpy/p2p.py index acd3a6f58..607fe411b 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -34,16 +34,16 @@ import loopy as lp import pytools.obj_array as obj_array +from arraycontext import PyOpenCLArrayContext -from sumpy.array_context import PyOpenCLArrayContext, make_loopy_program +from sumpy.array_context import make_loopy_program from sumpy.tools import KernelCacheMixin, KernelComputation, is_obj_array_like if TYPE_CHECKING: from collections.abc import Sequence - import pyopencl as cl - from arraycontext import Array + from arraycontext import Array, ArrayContext from pytools.obj_array import ObjectArray1D @@ -266,12 +266,12 @@ def get_kernel(self): return loopy_knl def __call__(self, - actx: PyOpenCLArrayContext, - targets: ObjectArray1D[Array] | Array, - sources: ObjectArray1D[Array] | Array, - strength: Sequence[Array], - **kwargs: Any, - ) -> tuple[cl.Event, Sequence[Array]]: + actx: ArrayContext, + targets: ObjectArray1D[Array] | Array, + sources: ObjectArray1D[Array] | Array, + strength: Sequence[Array], + **kwargs: Any, + ) -> ObjectArray1D[Array]: knl = self.get_cached_kernel( targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) @@ -340,7 +340,7 @@ def get_kernel(self): return loopy_knl def __call__(self, - actx: PyOpenCLArrayContext, + actx: ArrayContext, targets: ObjectArray1D[Array] | Array, sources: ObjectArray1D[Array] | Array, **kwargs: Any, @@ -443,7 +443,7 @@ def get_optimized_kernel(self, targets_is_obj_array, sources_is_obj_array): return knl def __call__(self, - actx: PyOpenCLArrayContext, + actx: ArrayContext, targets: ObjectArray1D[Array] | Array, sources: ObjectArray1D[Array] | Array, *, @@ -803,7 +803,7 @@ def get_optimized_kernel(self, *, return knl def __call__(self, - actx: PyOpenCLArrayContext, + actx: ArrayContext, targets: ObjectArray1D[Array] | Array, sources: ObjectArray1D[Array] | Array, *, @@ -823,6 +823,7 @@ def __call__(self, source_dtype = None strength_dtype = None + assert isinstance(actx, PyOpenCLArrayContext) knl = self.get_cached_kernel( max_nsources_in_one_box=max_nsources_in_one_box, max_ntargets_in_one_box=max_ntargets_in_one_box, diff --git a/sumpy/qbx.py b/sumpy/qbx.py index ad7088451..926367a9d 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -39,12 +39,14 @@ from pytools import memoize_method import sumpy.symbolic as sym -from sumpy.array_context import PyOpenCLArrayContext, is_cl_cpu, make_loopy_program +from sumpy.array_context import is_cl_cpu, make_loopy_program from sumpy.tools import KernelCacheMixin, KernelComputation, is_obj_array_like if TYPE_CHECKING: + from arraycontext import ArrayContext + from sumpy.expansion.local import ( LineTaylorLocalExpansion, LocalExpansionBase, @@ -309,7 +311,7 @@ def get_kernel(self): return loopy_knl - def __call__(self, actx: PyOpenCLArrayContext, + def __call__(self, actx: ArrayContext, targets, sources, centers, strengths, expansion_radii, **kwargs): """ @@ -394,7 +396,7 @@ def get_kernel(self): return loopy_knl - def __call__(self, actx: PyOpenCLArrayContext, + def __call__(self, actx: ArrayContext, targets, sources, centers, expansion_radii, **kwargs): knl = self.get_cached_kernel( is_cpu=is_cl_cpu(actx), @@ -501,7 +503,7 @@ def get_optimized_kernel(self, loopy_knl = self._allow_redundant_execution_of_knl_scaling(loopy_knl) return loopy_knl - def __call__(self, actx: PyOpenCLArrayContext, + def __call__(self, actx: ArrayContext, targets, sources, centers, expansion_radii, tgtindices, srcindices, **kwargs): """Evaluate a subset of the QBX matrix interactions. diff --git a/sumpy/tools.py b/sumpy/tools.py index 8cae66789..25dc4bdc4 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -53,6 +53,7 @@ from optype.numpy import Array2D import pyopencl + from arraycontext import ArrayContext from pymbolic.primitives import Variable from pymbolic.typing import Expression @@ -972,13 +973,14 @@ def _get_fft_backend(queue: pyopencl.CommandQueue) -> FFTBackend: def get_opencl_fft_app( - actx: PyOpenCLArrayContext, + actx: ArrayContext, shape: tuple[int, ...], dtype: numpy.dtype[Any], inverse: bool) -> Any: """Setup an object for out-of-place FFT on with given shape and dtype on given queue. """ + assert isinstance(actx, PyOpenCLArrayContext) assert dtype.type in (np.float32, np.float64, np.complex64, np.complex128) @@ -1001,7 +1003,7 @@ def get_opencl_fft_app( def run_opencl_fft( - actx: PyOpenCLArrayContext, + actx: ArrayContext, fft_app: tuple[Any, FFTBackend], input_vec: Any, inverse: bool = False, @@ -1012,6 +1014,8 @@ def run_opencl_fft( vector. Only supports in-order queues. """ + assert isinstance(actx, PyOpenCLArrayContext) + app, backend = fft_app if backend == FFTBackend.loopy: diff --git a/sumpy/toys.py b/sumpy/toys.py index 1eb563608..c249ca1b1 100644 --- a/sumpy/toys.py +++ b/sumpy/toys.py @@ -43,7 +43,8 @@ if TYPE_CHECKING: from collections.abc import Mapping, Sequence - from sumpy.array_context import PyOpenCLArrayContext + from arraycontext import ArrayContext + from sumpy.expansion import ( ExpansionFactoryBase, LocalExpansionFactory, @@ -338,7 +339,7 @@ def _e2p(actx, psource, targets, e2p): return actx.to_numpy(pot) -def _e2e(actx: PyOpenCLArrayContext, +def _e2e(actx: ArrayContext, psource, to_center, to_rscale, to_order: int, e2e, expn_class, expn_kwargs, extra_kernel_kwargs): toy_ctx = psource.toy_ctx @@ -386,7 +387,7 @@ def _e2e(actx: PyOpenCLArrayContext, derived_from=psource, **expn_kwargs) -def _m2l(actx: PyOpenCLArrayContext, +def _m2l(actx: ArrayContext, psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs, translation_classes_kwargs): toy_ctx = psource.toy_ctx @@ -517,7 +518,7 @@ class PotentialSource: def __init__(self, toy_ctx: ToyContext): self.toy_ctx = toy_ctx - def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: + def eval(self, actx: ArrayContext, targets: np.ndarray) -> np.ndarray: """ :param targets: An array of shape ``(dim, ntargets)``. :returns: an array of shape ``(ntargets,)``. @@ -567,7 +568,7 @@ def __init__(self, toy_ctx: ToyContext, value): super().__init__(toy_ctx) self.value = np.array(value) - def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: + def eval(self, actx: ArrayContext, targets: np.ndarray) -> np.ndarray: pot = np.empty(targets.shape[-1], dtype=self.value.dtype) pot.fill(self.value) return pot @@ -588,7 +589,8 @@ def __init__(self, self.center = np.asarray(center) self.radius = radius - def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: + @override + def eval(self, actx: ArrayContext, targets: np.ndarray) -> np.ndarray: dist_vec = targets - self.center[:, np.newaxis] return (np.sum(dist_vec**2, axis=0) < self.radius**2).astype(np.float64) @@ -607,7 +609,7 @@ def __init__(self, toy_ctx: ToyContext, center: np.ndarray, self.axis = axis self.side = side - def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: + def eval(self, actx: ArrayContext, targets: np.ndarray) -> np.ndarray: return ( (self.side*(targets[self.axis] - self.center[self.axis])) >= 0 ).astype(np.float64) @@ -633,7 +635,8 @@ def __init__(self, self.weights = weights self._center = center - def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: + @override + def eval(self, actx: ArrayContext, targets: np.ndarray) -> np.ndarray: potential, = self.toy_ctx.get_p2p()( actx, actx.from_numpy(targets), @@ -692,7 +695,7 @@ class MultipoleExpansion(ExpansionPotentialSource): Inherits from :class:`ExpansionPotentialSource`. """ - def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: + def eval(self, actx: ArrayContext, targets: np.ndarray) -> np.ndarray: return _e2p(actx, self, targets, self.toy_ctx.get_m2p(self.order)) @@ -701,7 +704,7 @@ class LocalExpansion(ExpansionPotentialSource): Inherits from :class:`ExpansionPotentialSource`. """ - def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: + def eval(self, actx: ArrayContext, targets: np.ndarray) -> np.ndarray: return _e2p(actx, self, targets, self.toy_ctx.get_l2p(self.order)) @@ -736,7 +739,7 @@ class Sum(PotentialExpressionNode): """ @override - def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: + def eval(self, actx: ArrayContext, targets: np.ndarray) -> np.ndarray: result = np.zeros(targets.shape[1]) for psource in self.psources: result = result + psource.eval(actx, targets) @@ -750,7 +753,7 @@ class Product(PotentialExpressionNode): """ @override - def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: + def eval(self, actx: ArrayContext, targets: np.ndarray) -> np.ndarray: result = np.ones(targets.shape[1]) for psource in self.psources: result = result * psource.eval(actx, targets) @@ -760,7 +763,7 @@ def eval(self, actx: PyOpenCLArrayContext, targets: np.ndarray) -> np.ndarray: def multipole_expand( - actx: PyOpenCLArrayContext, + actx: ArrayContext, psource: PotentialSource, center: np.ndarray, *, order: int | None = None, @@ -787,7 +790,7 @@ def multipole_expand( def local_expand( - actx: PyOpenCLArrayContext, + actx: ArrayContext, psource: PotentialSource, center: np.ndarray, *, order: int | None = None, @@ -841,7 +844,7 @@ def local_expand( def logplot( - actx: PyOpenCLArrayContext, + actx: ArrayContext, fp: FieldPlotter, psource: PotentialSource, **kwargs) -> None: fp.show_scalar_in_matplotlib( @@ -898,7 +901,7 @@ def combine_halfspace_and_outer( psource_outer, radius, center) -def l_inf(actx: PyOpenCLArrayContext, psource: PotentialSource, radius: float, +def l_inf(actx: ArrayContext, psource: PotentialSource, radius: float, center: np.ndarray | None = None, npoints: int = 100, debug: bool = False) -> np.number: if center is None: From 22b9b8f9adb9b58c5697be39be85a275f0e60fcd Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 10 Dec 2025 15:39:05 -0600 Subject: [PATCH 59/59] Fix some type annotations in P2P --- sumpy/p2p.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sumpy/p2p.py b/sumpy/p2p.py index 607fe411b..e1ef9078d 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -344,7 +344,7 @@ def __call__(self, targets: ObjectArray1D[Array] | Array, sources: ObjectArray1D[Array] | Array, **kwargs: Any, - ) -> Sequence[Array]: + ) -> ObjectArray1D[Array]: knl = self.get_cached_kernel( targets_is_obj_array=is_obj_array_like(targets), sources_is_obj_array=is_obj_array_like(sources)) @@ -450,7 +450,7 @@ def __call__(self, tgtindices: Array, srcindices: Array, **kwargs: Any, - ) -> tuple[cl.Event, Sequence[Array]]: + ) -> ObjectArray1D[Array]: """Evaluate a subset of the P2P matrix interactions. :arg targets: target point coordinates, which can be an object @@ -810,7 +810,7 @@ def __call__(self, max_nsources_in_one_box: int, max_ntargets_in_one_box: int, **kwargs: Any, - ) -> tuple[cl.Event, Sequence[Array]]: + ) -> ObjectArray1D[Array]: from sumpy.array_context import is_cl_cpu is_gpu = not is_cl_cpu(actx)