diff --git a/.test-conda-env-py3.yml b/.test-conda-env-py3.yml index 2264e9260..9f8c38d0c 100644 --- a/.test-conda-env-py3.yml +++ b/.test-conda-env-py3.yml @@ -12,12 +12,12 @@ dependencies: - islpy - pyopencl - python=3 -- python-symengine=0.6.0 +- python-symengine=0.6.1 - pyfmmlib - pip - pip: - git+https://github.com/inducer/pytools - git+https://gitlab.tiker.net/inducer/boxtree - - git+https://github.com/inducer/pymbolic + - git+https://gitlab.tiker.net/inducer/pymbolic - git+https://github.com/inducer/loopy diff --git a/benchmarks/bench_translations.py b/benchmarks/bench_translations.py index 8d6cfdd88..58388ceba 100644 --- a/benchmarks/bench_translations.py +++ b/benchmarks/bench_translations.py @@ -75,7 +75,7 @@ def track_m2l_op_count(self, param): for i, expr in enumerate(result): sac.assign_unique("coeff%d" % i, expr) sac.run_global_cse() - insns = to_loopy_insns(six.iteritems(sac.assignments)) + insns, _ = to_loopy_insns(six.iteritems(sac.assignments)) counter = pymbolic.mapper.flop_counter.CSEAwareFlopCounter() return sum([counter.rec(insn.expression)+1 for insn in insns]) diff --git a/requirements.txt b/requirements.txt index efe91f948..f522da7df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ numpy sympy==1.1.1 git+https://github.com/inducer/pytools -git+https://github.com/inducer/pymbolic +git+https://gitlab.tiker.net/inducer/pymbolic git+https://github.com/inducer/islpy git+https://github.com/inducer/pyopencl git+https://gitlab.tiker.net/inducer/boxtree diff --git a/sumpy/assignment_collection.py b/sumpy/assignment_collection.py index dadf3fc95..f12c6d596 100644 --- a/sumpy/assignment_collection.py +++ b/sumpy/assignment_collection.py @@ -185,16 +185,20 @@ def run_global_cse(self, extra_exprs=[]): new_assignments, new_exprs = cse(assign_exprs + extra_exprs, symbols=self.symbol_generator) + xreplace_dict = {} + for name, value in new_assignments: + new_value = sym.make_cse(value.xreplace(xreplace_dict)) + xreplace_dict[name] = new_value + + for i in range(len(new_exprs)): + new_exprs[i] = new_exprs[i].xreplace(xreplace_dict) + new_assign_exprs = new_exprs[:len(assign_exprs)] new_extra_exprs = new_exprs[len(assign_exprs):] for name, new_expr in zip(assign_names, new_assign_exprs): self.assignments[name] = new_expr - for name, value in new_assignments: - assert isinstance(name, sym.Symbol) - self.add_assignment(name.name, value) - logger.info("common subexpression elimination: done after {dur:.2f} s" .format(dur=time.time() - start_time)) return new_extra_exprs diff --git a/sumpy/codegen.py b/sumpy/codegen.py index 085f9bd45..1d72ed868 100644 --- a/sumpy/codegen.py +++ b/sumpy/codegen.py @@ -32,7 +32,7 @@ import six import re -from pymbolic.mapper import IdentityMapper, WalkMapper, CSECachingMapperMixin +from pymbolic.mapper import CSECachingMapperMixin import pymbolic.primitives as prim from loopy.types import NumpyType @@ -40,6 +40,8 @@ from pytools import memoize_method from sumpy.symbolic import (SympyToPymbolicMapper as SympyToPymbolicMapperBase) +from sumpy.symbolic import Series, IdentityMapper, WalkMapper, SubstitutionMapper +from pymbolic.mapper.substitutor import make_subst_func import logging logger = logging.getLogger(__name__) @@ -64,13 +66,16 @@ class SympyToPymbolicMapper(SympyToPymbolicMapperBase): + def map_Sum(self, expr): # noqa + pymbolic_limits = [] + for name, low, high in expr.limits: + pymbolic_limits.append((self.rec(name), self.rec(low), self.rec(high))) + + return Series(self.rec(expr.function), pymbolic_limits) + def not_supported(self, expr): if isinstance(expr, int): return expr - elif getattr(expr, "is_Function", False): - func_name = SympyToPymbolicMapperBase.function_name(self, expr) - return prim.Variable(func_name)( - *tuple(self.rec(arg) for arg in expr.args)) else: return SympyToPymbolicMapperBase.not_supported(self, expr) @@ -123,8 +128,6 @@ def make_one_step_subst(assignments): # {{{ make substitution - from pymbolic import substitute - result = {} used_name_to_var = {} from pymbolic import evaluate @@ -133,7 +136,7 @@ def make_one_step_subst(assignments): for name in toposort: value = assignments[name] - value = substitute(value, result) + value = SubstitutionMapper(make_subst_func(result))(value) used_name_to_var.update( (used_name, prim.Variable(used_name)) for used_name in get_dependencies(value) @@ -177,9 +180,8 @@ def kill_trivial_assignments(assignments, retain_names=set()): unsubst_rej = make_one_step_subst(rejected_assignments) result = [] - from pymbolic import substitute for name, expr in approved_assignments: - r = substitute(expr, unsubst_rej) + r = SubstitutionMapper(make_subst_func(unsubst_rej))(expr) result.append((name, r)) logger.info( @@ -677,6 +679,35 @@ def map_variable(self, expr): map_common_subexpression_uncached = IdentityMapper.map_common_subexpression +# {{{ convert pymbolic "Series" class to loopy reduction + +class SeriesRewritter(CSECachingMapperMixin, IdentityMapper): + + def __init__(self): + self.additional_loop_domain = [] + + def map_series(self, expr): + function = self.rec(expr.function) + inames = [] + + for name, low, high in expr.limits: + low = self.rec(low) + high = self.rec(high) + name = str(name) + + inames.append(name) + self.additional_loop_domain.append( + # +1 is used for converting the closed bound in sympy to open bound + (name, low + 1, high + 1) + ) + + return lp.Reduction("sum", tuple(inames), function) + + map_common_subexpression_uncached = IdentityMapper.map_common_subexpression + +# }}} + + def to_loopy_insns(assignments, vector_names=set(), pymbolic_expr_maps=[], complex_dtype=None, retain_names=set()): logger.info("loopy instruction generation: start") @@ -703,6 +734,7 @@ def to_loopy_insns(assignments, vector_names=set(), pymbolic_expr_maps=[], # do the rest of the conversion bessel_sub = BesselSubstitutor(BesselGetter(btog.bessel_j_arg_to_top_order)) + sr = SeriesRewritter() vcr = VectorComponentRewriter(vector_names) pwr = PowerRewriter() ssg = SumSignGrouper() @@ -712,6 +744,7 @@ def to_loopy_insns(assignments, vector_names=set(), pymbolic_expr_maps=[], def convert_expr(name, expr): logger.debug("generate expression for: %s" % name) + expr = sr(expr) expr = bdr(expr) expr = bessel_sub(expr) expr = vcr(expr) @@ -735,6 +768,6 @@ def convert_expr(name, expr): for name, expr in assignments] logger.info("loopy instruction generation: done") - return result + return result, sr.additional_loop_domain # vim: fdm=marker diff --git a/sumpy/e2e.py b/sumpy/e2e.py index 12e80b5f5..6c8653abb 100644 --- a/sumpy/e2e.py +++ b/sumpy/e2e.py @@ -160,13 +160,18 @@ def get_kernel(self): # # (same for itgt_box, tgt_ibox) + insns, additional_domain = self.get_translation_loopy_insns() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] @@ -190,7 +195,7 @@ def get_kernel(self): {{dep=read_src_ibox}} """.format(coeffidx=i) for i in range(ncoeff_src)] + [ - ] + self.get_translation_loopy_insns() + [""" + ] + insns + [""" end """] + [""" @@ -276,11 +281,16 @@ def get_kernel(self): # # (same for itgt_box, tgt_ibox) + insns, additional_domain = self.get_translation_loopy_insns() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + loopy_insns = [ insn.copy( predicates=insn.predicates | frozenset(["is_src_box_valid"]), id=lp.UniqueName("compute_coeff")) - for insn in self.get_translation_loopy_insns()] + for insn in insns] from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( @@ -288,7 +298,7 @@ def get_kernel(self): "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] @@ -395,12 +405,17 @@ def get_kernel(self): # # (same for itgt_box, tgt_ibox) + insns, additional_domain = self.get_translation_loopy_insns() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + from sumpy.tools import gather_loopy_arguments loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box tgt_ibox = target_boxes[itgt_box] @@ -419,7 +434,7 @@ def get_kernel(self): {{id_prefix=read_expn,dep=read_src_ibox}} """.format(i=i) for i in range(ncoeffs)] + [ - ] + self.get_translation_loopy_insns() + [""" + ] + insns + [""" tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] = \ tgt_expansions[tgt_ibox - tgt_base_ibox, {i}] + coeff{i} \ diff --git a/sumpy/e2p.py b/sumpy/e2p.py index 7b0072ad5..ffe790538 100644 --- a/sumpy/e2p.py +++ b/sumpy/e2p.py @@ -102,7 +102,7 @@ def get_loopy_insns_and_result_names(self): sac.run_global_cse() from sumpy.codegen import to_loopy_insns - loopy_insns = to_loopy_insns( + loopy_insns, additional_domain = to_loopy_insns( six.iteritems(sac.assignments), vector_names=set(["b"]), pymbolic_expr_maps=[self.expansion.get_code_transformer()], @@ -110,7 +110,7 @@ def get_loopy_insns_and_result_names(self): complex_dtype=np.complex128 # FIXME ) - return loopy_insns, result_names + return loopy_insns, additional_domain, result_names def get_kernel_scaling_assignment(self): from sumpy.symbolic import SympyToPymbolicMapper @@ -135,13 +135,17 @@ class E2PFromSingleBox(E2PBase): def get_kernel(self): ncoeffs = len(self.expansion) - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) loopy_knl = lp.make_kernel( [ "{[itgt_box]: 0<=itgt_box src_ibox = source_boxes[isrc_box] @@ -133,7 +138,7 @@ def get_kernel(self): <> a[idim] = center[idim] - sources[idim, isrc] {dup=idim} <> strength = strengths[isrc] - """] + self.get_loopy_instructions() + [""" + """] + insns + [""" end """] + [""" tgt_expansions[src_ibox-tgt_base_ibox, {coeffidx}] = \ @@ -206,6 +211,11 @@ class P2EFromCSR(P2EBase): def get_kernel(self): ncoeffs = len(self.expansion) + insns, additional_domain = self.get_loopy_instructions() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + from sumpy.tools import gather_loopy_source_arguments arguments = ( [ @@ -231,7 +241,7 @@ def get_kernel(self): "{[isrc_box]: isrc_box_start<=isrc_box tgt_ibox = target_boxes[itgt_box] @@ -251,7 +261,7 @@ def get_kernel(self): {dup=idim} <> strength = strengths[isrc] - """] + self.get_loopy_instructions() + [""" + """] + insns + [""" end end """] + [""" diff --git a/sumpy/p2p.py b/sumpy/p2p.py index e3b457dd5..e54bb7393 100644 --- a/sumpy/p2p.py +++ b/sumpy/p2p.py @@ -99,7 +99,8 @@ def get_loopy_insns_and_result_names(self): sac.run_global_cse() from sumpy.codegen import to_loopy_insns - loopy_insns = to_loopy_insns(six.iteritems(sac.assignments), + loopy_insns, additional_domain = to_loopy_insns( + six.iteritems(sac.assignments), vector_names=set(["d"]), pymbolic_expr_maps=[ knl.get_code_transformer() for knl in self.kernels], @@ -107,7 +108,7 @@ def get_loopy_insns_and_result_names(self): complex_dtype=np.complex128 # FIXME ) - return loopy_insns, result_names + return loopy_insns, additional_domain, result_names def get_strength_or_not(self, isrc, kernel_idx): return var("strength").index((self.strength_usage[kernel_idx], isrc)) @@ -168,7 +169,12 @@ class P2P(P2PBase): default_name = "p2p_apply" def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -184,7 +190,7 @@ def get_kernel(self): 0 <= itgt < ntargets and \ 0 <= isrc < nsources and \ 0 <= idim < dim} - """], + """] + additional_domain, self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> d[idim] = targets[idim, itgt] - sources[idim, isrc]"] @@ -238,7 +244,12 @@ def get_strength_or_not(self, isrc, kernel_idx): return 1 def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -251,7 +262,7 @@ def get_kernel(self): 0 <= itgt < ntargets and \ 0 <= isrc < nsources and \ 0 <= idim < dim} - """], + """] + additional_domain, self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> d[idim] = targets[idim, itgt] - sources[idim, isrc]"] @@ -304,7 +315,12 @@ def get_strength_or_not(self, isrc, kernel_idx): return 1 def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -317,7 +333,8 @@ def get_kernel(self): for i, dtype in enumerate(self.value_dtypes)]) loopy_knl = lp.make_kernel( - "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}", + ["{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}"] + + additional_domain, self.get_kernel_scaling_assignments() # NOTE: itgt, isrc need to always be defined in case a statement # in loopy_insns or kernel_exprs needs them (e.g. hardcoded in @@ -412,7 +429,12 @@ class P2PFromCSR(P2PBase): default_name = "p2p_from_csr" def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -443,7 +465,7 @@ def get_kernel(self): itgt_start <= itgt < itgt_end and \ isrc_start <= isrc < isrc_end and \ 0 <= idim < dim}", - ], + ] + additional_domain, self.get_kernel_scaling_assignments() + [""" for itgt_box diff --git a/sumpy/qbx.py b/sumpy/qbx.py index 9708764c0..cc3e909cc 100644 --- a/sumpy/qbx.py +++ b/sumpy/qbx.py @@ -122,7 +122,7 @@ def get_loopy_insns_and_result_names(self): sac.run_global_cse() from sumpy.codegen import to_loopy_insns - loopy_insns = to_loopy_insns( + loopy_insns, additional_domain = to_loopy_insns( six.iteritems(sac.assignments), vector_names=set(["a", "b"]), pymbolic_expr_maps=[ @@ -131,7 +131,7 @@ def get_loopy_insns_and_result_names(self): complex_dtype=np.complex128 # FIXME ) - return loopy_insns, result_names + return loopy_insns, additional_domain, result_names def get_strength_or_not(self, isrc, kernel_idx): return var("strength_%d" % self.strength_usage[kernel_idx]).index(isrc) @@ -198,7 +198,12 @@ class LayerPotential(LayerPotentialBase): @memoize_method def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -214,7 +219,7 @@ def get_kernel(self): 0 <= itgt < ntargets and \ 0 <= isrc < nsources and \ 0 <= idim < dim} - """], + """] + additional_domain, self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}"] @@ -272,7 +277,12 @@ def get_strength_or_not(self, isrc, kernel_idx): @memoize_method def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -285,7 +295,7 @@ def get_kernel(self): 0 <= itgt < ntargets and \ 0 <= isrc < nsources and \ 0 <= idim < dim} - """], + """] + additional_domain, self.get_kernel_scaling_assignments() + ["for itgt, isrc"] + ["<> a[idim] = center[idim, itgt] - src[idim, isrc] {dup=idim}"] @@ -336,7 +346,12 @@ def get_strength_or_not(self, isrc, kernel_idx): @memoize_method def get_kernel(self): - loopy_insns, result_names = self.get_loopy_insns_and_result_names() + loopy_insns, additional_domain, result_names = \ + self.get_loopy_insns_and_result_names() + + from sumpy.tools import get_loopy_domain + additional_domain = get_loopy_domain(additional_domain) + kernel_exprs = self.get_kernel_exprs(result_names) arguments = ( self.get_default_src_tgt_arguments() @@ -350,7 +365,7 @@ def get_kernel(self): loopy_knl = lp.make_kernel([ "{[imat, idim]: 0 <= imat < nresult and 0 <= idim < dim}" - ], + ] + additional_domain, self.get_kernel_scaling_assignments() # NOTE: itgt, isrc need to always be defined in case a statement # in loopy_insns or kernel_exprs needs them (e.g. hardcoded in diff --git a/sumpy/symbolic.py b/sumpy/symbolic.py index 7a86958ae..f4f185fa8 100644 --- a/sumpy/symbolic.py +++ b/sumpy/symbolic.py @@ -27,7 +27,9 @@ from six.moves import range import numpy as np -from pymbolic.mapper import IdentityMapper as IdentityMapperBase +from loopy.symbolic import IdentityMapper as IdentityMapperBase +from loopy.symbolic import WalkMapper as WalkMapperBase +from pymbolic.mapper.substitutor import SubstitutionMapper as SubstitutionMapperBase import pymbolic.primitives as prim import logging @@ -80,13 +82,15 @@ def _find_symbolic_backend(): if USE_SYMENGINE: import symengine as sym - from pymbolic.interop.symengine import ( + from pymbolic.interop.symengine import ( # noqa: F401 PymbolicToSymEngineMapper as PymbolicToSympyMapper, - SymEngineToPymbolicMapper as SympyToPymbolicMapper) + SymEngineToPymbolicMapper as SympyToPymbolicMapper, + make_cse) else: import sympy as sym - from pymbolic.interop.sympy import ( - PymbolicToSympyMapper, SympyToPymbolicMapper) + from pymbolic.interop.sympy import ( # noqa: F401 + PymbolicToSympyMapper, SympyToPymbolicMapper, + make_cse) for _apifunc in SYMBOLIC_API: globals()[_apifunc] = getattr(sym, _apifunc) @@ -246,4 +250,46 @@ def map_subscript(self, expr): else: self.raise_conversion_error(expr) + +# {{{ Series + +class Series(prim.Expression): + def __init__(self, function, limits): + self.function = function + self.limits = limits + + mapper_method = "map_series" + + def __getinitargs__(self): + return self.function, self.limits + + +class IdentityMapper(IdentityMapperBase): + def map_series(self, expr): + new_limits = [] + for name, low, high in expr.limits: + new_limits.append((name, self.rec(low), self.rec(high))) + + return Series(self.rec(expr.function), new_limits) + + +class WalkMapper(WalkMapperBase): + def map_series(self, expr, *args, **kwargs): + if not self.visit(expr, *args, **kwargs): + return + + for name, low, high in expr.limits: + self.rec(low, *args, **kwargs) + self.rec(high, *args, **kwargs) + + self.rec(expr.function, *args, **kwargs) + + self.post_visit(expr, *args, **kwargs) + + +class SubstitutionMapper(SubstitutionMapperBase, IdentityMapper): + pass + +# }}} + # vim: fdm=marker diff --git a/sumpy/tools.py b/sumpy/tools.py index 4d1098429..725a8bf6b 100644 --- a/sumpy/tools.py +++ b/sumpy/tools.py @@ -717,4 +717,37 @@ def my_syntactic_subs(expr, subst_dict): return expr +def get_loopy_domain(loop_domains): + """Helper function to get the loopy domain to pass to `loopy.make_kernel`. + + :arg loop_domains: a list of domains. Each domain is a tuple. If the tuple has 3 + elements (name, low, high), it represents iname "name" has range [low, high). + If the tuple has 2 elements (name, duplicate_name), it represents iname + "name" has the same range as "duplicate_name", where the domain of + "duplicate_name" must be represented by 3-element tuples. + """ + domain_to_range = {} + + for domain in loop_domains: + if len(domain) == 3: + name, low, high = domain + assert name not in domain_to_range + domain_to_range[name] = (low, high) + + for idx, domain in enumerate(loop_domains): + if len(domain) == 2: + name, duplicate_name = domain + low, high = domain_to_range[duplicate_name] + loop_domains[idx] = (name, low, high) + + domains = [] + for idx, (name, low, high) in enumerate(loop_domains): + domains.append( + "{{ [{name}]: {low} <= {name} < {high} }}".format( + name=name, low=low, high=high + ) + ) + + return domains + # vim: fdm=marker diff --git a/test/test_codegen.py b/test/test_codegen.py index 7e3c25e0e..79130a2fc 100644 --- a/test/test_codegen.py +++ b/test/test_codegen.py @@ -25,6 +25,11 @@ import sys +import pyopencl as cl +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) +import pytest + import logging logger = logging.getLogger(__name__) @@ -105,6 +110,85 @@ def test_line_taylor_coeff_growth(): assert np.polyfit(np.log(indices), np.log(counts), deg=1)[0] < max_order +def test_sym_sum(ctx_getter): + from sumpy.symbolic import USE_SYMENGINE + if USE_SYMENGINE: + pytest.xfail("Symengine does not support symbolic sum yet") + + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + from sumpy.assignment_collection import SymbolicAssignmentCollection + sac = SymbolicAssignmentCollection() + + from sympy.abc import i, j + from sympy import Sum + sac.add_assignment("tmp", Sum(j, (j, 0, i))) + + import six + from sumpy.codegen import to_loopy_insns + insn, additional_loop_domain = to_loopy_insns( + six.iteritems(sac.assignments), + retain_names=["tmp"] + ) + + from sumpy.tools import get_loopy_domain + import loopy as lp + knl = lp.make_kernel( + ["{[i]: 0<=i<=5}"] + get_loopy_domain(additional_loop_domain), + ["for i"] + + insn + + [lp.Assignment("a[i]", "tmp")] + + ["end"], + lang_version=(2018, 2) + ) + + _, result = knl(queue) + result = result[0].get() + + import numpy as np + ref_sol = np.array([0, 1, 3, 6, 10, 15], dtype=np.int32) + + assert result.shape == (6,) + assert np.allclose(result, ref_sol) + + +def test_sym_nested_sum(ctx_getter): + from sumpy.symbolic import USE_SYMENGINE + if USE_SYMENGINE: + pytest.xfail("Symengine does not support symbolic sum yet") + + ctx = ctx_getter() + queue = cl.CommandQueue(ctx) + + from sumpy.assignment_collection import SymbolicAssignmentCollection + sac = SymbolicAssignmentCollection() + + from sympy.abc import i, j + from sympy import Sum + sac.add_assignment("tmp", Sum(i*j, (i, 0, 5), (j, 0, 3))) + + import six + from sumpy.codegen import to_loopy_insns + insn, additional_loop_domain = to_loopy_insns( + six.iteritems(sac.assignments), + retain_names=["tmp"] + ) + + from sumpy.tools import get_loopy_domain + import loopy as lp + knl = lp.make_kernel( + get_loopy_domain(additional_loop_domain), + insn + [lp.Assignment("a", "tmp")], + lang_version=(2018, 2) + ) + + _, result = knl(queue) + result = result[0].get() + + assert result == 90 + + # You can test individual routines by typing # $ python test_fmm.py 'test_sumpy_fmm(cl.create_some_context)' @@ -112,7 +196,6 @@ def test_line_taylor_coeff_growth(): if len(sys.argv) > 1: exec(sys.argv[1]) else: - from pytest import main - main([__file__]) + pytest.main([__file__]) # vim: fdm=marker