diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 241dd84..e24b666 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,43 +8,43 @@ jobs: strategy: max-parallel: 5 matrix: - python-version: ['3.8', '3.9', '3.10'] + python-version: ['3.11', '3.12', 'pypy-3.11'] steps: - name: Setup Julia - uses: julia-actions/setup-julia@v1 + uses: julia-actions/setup-julia@v2 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies + - name: Checkout + uses: actions/checkout@v4 + + - name: Build and install deps run: | - # install HPy from source if depending on a dev version - # git clone -b master --single-branch https://github.com/hpyproject/hpy - # git checkout 1234abcd - # cd hpy - # pip install . - pip install numpy cython pytest transonic pythran 'setuptools>=60.2' 'hpy>=0.9.0rc1' + pip install -e .[full] - - name: Checkout - uses: actions/checkout@v3 - with: - fetch-depth: 0 + - if: startsWith(matrix.python-version, 'pypy') != true + name: Build universal extension (only needed for CPython) + run: | + pip install -e . --config-settings="--global-option=--hpy-abi=universal" - - name: build + - name: Remove _piconumpy_hpy.py run: | - python setup.py develop - python setup.py --hpy-abi=universal develop + rm -f piconumpy/_piconumpy_hpy.py - name: Run tests run: | - pytest -s + pytest -v - name: Run bench run: | cd bench + make tmp_result_julia.txt + make bench_hpy + make bench_full + # rerun bench_hpy to get these results also at the end make bench_hpy - make diff --git a/.gitignore b/.gitignore index 9a709bb..7a37679 100644 --- a/.gitignore +++ b/.gitignore @@ -9,5 +9,10 @@ build **/tmp*.* **/tmp*.* +**/tmp/* -*_cython.c \ No newline at end of file +*_cython.c + +piconumpy/_piconumpy_hpy.py + +.venv* diff --git a/.mdformat.toml b/.mdformat.toml new file mode 100644 index 0000000..972483a --- /dev/null +++ b/.mdformat.toml @@ -0,0 +1,3 @@ +wrap = 89 +number = true +end_of_line = "lf" diff --git a/LICENSE b/LICENSE index 44b8153..17e7869 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2020, Pierre Augier +Copyright (c) 2020-2025, Pierre Augier Copyright (c) 2021, 2023, Oracle and/or it's affiliates All rights reserved. diff --git a/Makefile b/Makefile index f7f7c54..92877da 100644 --- a/Makefile +++ b/Makefile @@ -1,38 +1,57 @@ ifeq ($(PYTHON),) -PYTHON := python +PYTHON := python3 endif +IMPLEMENTATION := $(shell $(PYTHON) -c "import sys; print(sys.implementation.name)") + + all: - make develop_universal -ifeq ($(PYTHON),python) - make build_ext + make editable_universal +ifeq ($(IMPLEMENTATION),cpython) + make editable endif + +rm_hpy_py: + rm -f piconumpy/_piconumpy_hpy.py + +editable: + $(PYTHON) -m pip install -e . + make rm_hpy_py + +editable_universal: + $(PYTHON) -m pip install -e . --config-settings="--global-option=--hpy-abi=universal" + make rm_hpy_py + +editable_full: + $(PYTHON) -m pip install -e .[full] + make rm_hpy_py + + +# deprecated but let's keep them develop: $(PYTHON) setup.py develop + make rm_hpy_py develop_universal: $(PYTHON) setup.py --hpy-abi=universal develop - rm -f piconumpy/_piconumpy_hpy.py - -pip: - $(PYTHON) -m pip install -e .[dev] + make rm_hpy_py build_ext_universal: $(PYTHON) setup.py --hpy-abi=universal build_ext -if + make rm_hpy_py build_ext: $(PYTHON) setup.py build_ext -if + make rm_hpy_py -full: - $(PYTHON) -m pip install -e .[full] format: black -l 82 setup.py piconumpy/*.py clang-format-7 -i piconumpy/*cpython_capi.c -tests: +tests: rm_hpy_py $(PYTHON) -m pytest piconumpy -s clean: @@ -40,4 +59,21 @@ clean: rm -rf build dist piconumpy.egg-info black: - black -l 82 . \ No newline at end of file + black -l 82 . + + +install_pypy: + uv python install pypy + +install_graalpy: + uv python install graalpy + +create_venv_cpy: + $(PYTHON) -m venv .venv_cpy --upgrade-deps + +create_venv_pypy: + $(shell uv python find pypy) -m venv .venv_pypy --upgrade-deps + +create_venv_graalpy: + # cannot use --upgrade-deps because pip is patched for GraalPy + $(shell uv python find graalpy) -m venv .venv_graalpy diff --git a/README.md b/README.md index a0bad3d..f473ec5 100644 --- a/README.md +++ b/README.md @@ -5,81 +5,64 @@ **An experiment about Numpy and HPy** The C API of CPython is one of the cause of the success of Python in scientific -computing. In particular, Numpy (and all the Python scientific stack) is built -on top of this API. However, some characteristics of this API start to be an -issue for the future of scientific Python (see [1], [2], [HPy]). +computing. In particular, Numpy (and all the Python scientific stack) is built on top of +this API. However, some characteristics of this API start to be an issue for the future +of scientific Python (see [1], [2], [HPy]). -[1]: https://faster-cpython.readthedocs.io/ -[2]: https://morepypy.blogspot.com/2019/12/hpy-kick-off-sprint-report.html -[HPy]: https://github.com/hpyproject/hpy - -[HPy] is a very ambitious and promissing project to design a new and better C -API for interacting with Python interpreters. It should allow people to write -Python extensions efficient on different interpreters (CPython, PyPy, Jython, -IronPython, GraalPython, RustPython, etc.). +[HPy] is a very ambitious and promising project to design a new and better C API for +interacting with Python interpreters. It should allow people to write Python extensions +efficient on different interpreters (CPython, PyPy, Jython, IronPython, GraalPython, +RustPython, etc.). -PyPy would be especially useful for some scientific applications. For example -for Integration and ODEs -([scipy.integrate](https://docs.scipy.org/doc/scipy/reference/integrate.html)), -for which there are a lot of callbacks of very small functions. This repository -contains [a tiny benchmark](bench/without_numpy) showing that as long as Numpy -is not used, PyPy is very efficient for such task. Unfortunately, as soon as -Numpy is used, PyPy becomes very slow! +PyPy would be especially useful for some scientific applications. For example for +Integration and ODEs +([scipy.integrate](https://docs.scipy.org/doc/scipy/reference/integrate.html)), for which +there are a lot of callbacks of very small functions. This repository contains +[a tiny benchmark](bench/without_numpy) showing that as long as Numpy is not used, PyPy +is very efficient for such task. Unfortunately, as soon as Numpy is used, PyPy becomes +very slow! -[bench/without_numpy]: https://github.com/paugier/piconumpy/blob/master/bench/without_numpy/ +With PicoNumpy, I'd like to study if [HPy] could help for codes using Numpy and callbacks +of small Python functions. -With PicoNumpy, I'd like to study if [HPy] could help for codes using Numpy and -callbacks of small Python functions. +We start by a [simple but realistic benchmark](bench/bench_array1d.py) (the slow loops +only involve pure-Python and very simple Numpy). We then wrote a tiny ("pico") +implementation of a Numpy like object (just sufficient to run the benchmark). -We start by a [simple but realistic benchmark](bench/bench_array1d.py) (the -slow loops only involve pure-Python and very simple Numpy). We then wrote a -tiny ("pico") implementation of a Numpy like object (just sufficient to run the -benchmark). +The next task is to reimplement PicoNumpy using [HPy] and to check if PyPy could +efficiently accelerate [our main benchmark](bench/bench_array1d.py). -The next task is to reimplement PicoNumpy using [HPy] and to check if PyPy -could efficiently accelerate [our main benchmark](bench/bench_array1d.py). - -PicoNumpy is really tiny. It just provides an `array` class (one-dimensional) -supporting: +PicoNumpy is really tiny. It just provides an `array` class (one-dimensional) supporting: - Instantiation from a list of floats -- Elementwise multiplication and division by a float -- Elementwise addition (of 2 arrays) +- Element-wise multiplication and division by a float +- Element-wise addition (of 2 arrays) - Indexing - `len` -A good acceleration by PyPy of our example would be a great proof that the -scientific Python community has to invest time and energy on [HPy]. - -In the script [bench_array1d.py](bench/bench_array1d.py), Transonic is used for -the benchmark and comparison. With Transonic-Pythran, we typically get a 50 -speedup compared to CPython (and ~400 versus PyPy, which is still very slow for -such codes using Numpy). +A good acceleration by PyPy of our example would be a great proof that the scientific +Python community has to invest time and energy on [HPy]. -[bench/bench_array1d.py]: https://github.com/paugier/piconumpy/blob/master/bench/bench_array1d.py +In the script [bench_array1d.py](bench/bench_array1d.py), Transonic is used for the +benchmark and comparison. With Transonic-Pythran, we typically get a 50 speed-up compared +to CPython (and ~400 versus PyPy, which is still very slow for such codes using Numpy). ## Install and run the benchmarks -**Warning:** PicoNumpy now depends on HPy, which still has to be installed from -the [Git repository](https://github.com/hpyproject/hpy). For now, the -installation is a bit more complex that what is described here (more about this -[here](#more-precise-notes-on-how-to-install-and-run-the-benchmarks-with-PyPy)). - -`make` should install the package in editable mode. `cd bench; make` should run -the benchmarks. For the benchmarks, Julia is used for a good comparison point -so the command `julia` has to be available. +`pip install -e .[full]` should build and install the package in editable mode and all +dependencies necessary for testing, benchmarking and profiling. -For PyPy, the Makefiles are sensible to the environment variable `PYTHON`, so -you could do: +For the benchmarks, Julia is used for a good comparison point so the command `julia` has +to be available. Different benchmarks can be run with -```bash -export PYTHON=pypy3 -make +```sh cd bench -make +make clean +make bench_hpy +make bench_full ``` -The benchmark code can be profiled for the different implementations with the +The benchmark code can be profiled for the different piconumpy implementations with the commands (you need gprof2dot and graphviz): ```bash @@ -90,97 +73,192 @@ make profile METHOD="purepy" make profile METHOD="cython" ``` -### More precise notes on how to install and run the benchmarks with PyPy +### Notes on PyPy -Download and extract a nightly PyPy build -. Add to the `PATH` environment variable -the path of the directory containing the `pypy` executable (something like -`~/opt/pypy-c-jit-101190-b661dc329618-linux64/bin`). Then, you should be able -to run: +PyPy can be downloaded with UV or manually (for example from + for a nightly build). -```bash -pypy -m ensurepip -pypy -m pip install pip -U -pypy -m pip install numpy cython pytest transonic pythran +With UV, one can run + +```sh +uv python install pypy +``` + +and then get the path towards `pypy` executable with: + +```sh +uv python find pypy ``` -We need to install the correct version of HPy for the version of PyPy we are using: +which can give something like +`~/.local/share/uv/python/pypy-3.11.11-linux-x86_64-gnu/bin/pypy`. + +Then, you should be able to create a virtual environment, activate it and build-install +PicoNumpy with ```bash -pypy -c "import hpy.universal as u; print(u.get_version())" +cd ~/dev/piconumpy +$(uv python find pypy) -m venv .venv_pypy --upgrade-deps +. .venv_pypy/bin/activate +pip install -e .[full] ``` -gives `('0.0.2rc2.dev12+gc9660c2', 'c9660c2')`. +and run the benchmarks with: ```bash -cd ~/Dev/hpy -# update to the correct commit -pypy setup.py develop +cd bench +make clean +make bench_hpy +make bench_full ``` -Now we can build-install PicoNumpy: +Note that one can check which HPy version is vendored with PyPy: ```bash -cd ~/Dev/piconumpy -pypy setup.py --hpy-abi=universal develop +python -c "import hpy.universal as u; print(u.get_version())" ``` -And run the benchmarks with: +### Notes on GraalPy + +GraalPy can be downloaded with UV with + +```sh +uv python install graalpy +``` + +Then, one can run + +```sh +cd ~/dev/piconumpy +# cannot use --upgrade-deps because pip is patched for GraalPy +$(uv python find graalpy) -m venv .venv_graalpy +. .venv_graalpy/bin/activate +# we don't try to run the full benchmarks using Pythran on GraalPy +pip install -e .[test,profile] +``` + +and run the benchmarks with: ```bash -export PYTHON="pypy" +cd bench make clean make bench_hpy -make ``` ## Few results -As of today (6 July 2021), HPy is not yet ready for high performance, but at -least (with HPy 0.0.2) it runs ! - -### At home (Intel(R) Core(TM) i5-8400 CPU @ 2.80GHz) +### Full benchmarks - With CPython ``` -Julia : 1 * norm = 0.00196 s -PicoNumpy (CPython C-API) : 9.42 * norm -PicoNumpy (HPy CPy ABI) : 9.95 * norm -PicoNumpy (HPy Universal) : 10.4 * norm -Transonic-Pythran : 0.497 * norm -Numpy : 27.5 * norm -PicoNumpy (purepy) : 37.3 * norm -PicoNumpy (purepy_array) : 37.7 * norm -PicoNumpy (Cython) : 28.9 * norm +{'cache_tag': 'cpython-311', + 'version': sys.version_info(major=3, minor=11, micro=2, releaselevel='final', serial=0)} +hostname: meige7ltpa212 +Julia : 1 * norm = 0.0129 s +PicoNumpy (CPython C-API) : 6.55 * norm +PicoNumpy (HPy CPy ABI) : 7.46 * norm +PicoNumpy (HPy Universal) : 7.92 * norm +Transonic-Pythran : 0.581 * norm +Numpy : 27.1 * norm +PicoNumpy (purepy) : 18.8 * norm +PicoNumpy (purepy_array) : 31.7 * norm +PicoNumpy (Cython) : 23.3 * norm ``` - With PyPy3 ``` -Julia : 1 * norm = 0.00196 s -PicoNumpy (CPython C-API) : 34.1 * norm -PicoNumpy (HPy Universal) : 12.8 * norm -Transonic-Pythran : 0.539 * norm -Numpy : 232 * norm -PicoNumpy (purepy) : 4.39 * norm -PicoNumpy (purepy_array) : 6.33 * norm -PicoNumpy (Cython) : 274 * norm +{'cache_tag': 'pypy311', + 'version': sys.pypy_version_info(major=7, minor=3, micro=19, releaselevel='final', serial=0)} +hostname: meige7ltpa212 +Julia : 1 * norm = 0.0129 s +PicoNumpy (CPython C-API) : 35.5 * norm +PicoNumpy (HPy Universal) : 44.7 * norm +Transonic-Pythran : 0.609 * norm +Numpy : 168 * norm +PicoNumpy (purepy) : 2.98 * norm +PicoNumpy (purepy_array) : 8.7 * norm +PicoNumpy (Cython) : 288 * norm ``` -#### Simpler benchmarks (bench/bench_cpy_vs_hpy.py) +Discussion: PyPy with HPy universal is really too slow (44.7x slower than Julia, 6x slower than +CPython with its C-API and even a bit slower that PyPy with cpyext!). This is a big issue +for HPy! + +A reasonable target would be as fast as CPython with its C-API... + +Profiling shows that the issue is related to slow element-wise operations as in the micro-benchmark + +```sh +cd microbench_low_level +make bench_element_wise +``` - With CPython +```sh +bench element_wise +hostname: meige7ltpa212 +{'cache_tag': 'cpython-311', + 'version': sys.version_info(major=3, minor=11, micro=2, releaselevel='final', serial=0)} +piconumpy.purepy : 7.88e-06 s ( 21.9 * Julia) +numpy : 7.88e-06 s ( 21.9 * Julia) +piconumpy.hpy (universal) : 1.34e-06 s ( 3.7 * Julia) +piconumpy.cpython_capi : 6.12e-07 s ( 1.7 * Julia) ``` -CPython C-API: 1.92 seconds -HPy [Universal]: 2.08 seconds -HPy [CPy ABI]: 2.02 seconds + +- With PyPy3 + +```sh +bench element_wise +hostname: meige7ltpa212 +{'cache_tag': 'pypy311', + 'version': sys.pypy_version_info(major=7, minor=3, micro=19, releaselevel='final', serial=0)} +piconumpy.purepy : 1.46e-06 s ( 4.1 * Julia) +numpy : 4.39e-05 s (121.9 * Julia) +piconumpy.hpy (universal) : 4.27e-06 s ( 11.9 * Julia) +piconumpy.cpython_capi : 1.84e-06 s ( 5.1 * Julia) +``` + +### Simpler benchmarks (bench/bench_cpy_vs_hpy.py) + +- With CPython + +``` +{'cache_tag': 'cpython-311', + 'version': sys.version_info(major=3, minor=11, micro=2, releaselevel='final', serial=0)} +hostname: meige7ltpa212 +Julia: 0.013 seconds +CPython C-API: 0.084 seconds ( 6.5 * Julia) +HPy [Universal]: 0.102 seconds ( 7.9 * Julia) +HPy [CPy ABI]: 0.096 seconds ( 7.4 * Julia) ``` - With PyPy3 ``` -CPython C-API: 5.75 seconds -HPy [Universal]: 2.11 seconds +{'cache_tag': 'pypy311', + 'version': sys.pypy_version_info(major=7, minor=3, micro=19, releaselevel='final', serial=0)} +hostname: meige7ltpa212 +Julia: 0.013 seconds +CPython C-API: 0.382 seconds (29.6 * Julia) +HPy [Universal]: 0.487 seconds (37.6 * Julia) +Python list: 0.037 seconds ( 2.9 * Julia) ``` + +- GraalPy + +``` +{'cache_tag': 'graalpy242-311', + 'version': sys.version_info(major=3, minor=11, micro=7, releaselevel='final', serial=0)} +hostname: meige7ltpa212 +Julia: 0.013 seconds +CPython C-API: 2.123 seconds (164.2 * Julia) +HPy [Universal]: 1.541 seconds (119.2 * Julia) +Python list: 0.542 seconds (41.9 * Julia) +``` + +[1]: https://faster-cpython.readthedocs.io/ +[2]: https://morepypy.blogspot.com/2019/12/hpy-kick-off-sprint-report.html +[hpy]: https://github.com/hpyproject/hpy diff --git a/bench/Makefile b/bench/Makefile index 7da6e64..eb4c4d4 100644 --- a/bench/Makefile +++ b/bench/Makefile @@ -7,7 +7,7 @@ ifeq ($(METHOD),) METHOD := cpython-c-api endif -all: tmp.py tmp_result_julia.txt +bench_full: rm_hpy_py tmp.py tmp_result_julia.txt $(PYTHON) tmp.py tmp.py: bench_array1d.py make_bench_piconumpy.py @@ -20,11 +20,14 @@ clean: tmp_result_julia.txt: julia bench.jl > tmp_result_julia.txt -profile: tmp.py +profile: rm_hpy_py tmp.py $(PYTHON) profile_piconumpy.py $(METHOD) # with gprof2dot and graphviz (command dot) gprof2dot -f pstats tmp.pstats | dot -Tpng -o tmp_$(METHOD).png eog tmp_$(METHOD).png -bench_hpy: +bench_hpy: rm_hpy_py $(PYTHON) bench_cpy_vs_hpy.py + +rm_hpy_py: + rm -f ../piconumpy/_piconumpy_hpy.py diff --git a/bench/bench.jl b/bench/bench.jl index 00cedff..bd98571 100644 --- a/bench/bench.jl +++ b/bench/bench.jl @@ -65,7 +65,7 @@ function bench(n_sleds, n_time) end -n_sleds = 10 +n_sleds = 100 n_time = 200 nb_runs = 200 diff --git a/bench/bench_array1d.py b/bench/bench_array1d.py index a73a635..ba4426f 100644 --- a/bench/bench_array1d.py +++ b/bench/bench_array1d.py @@ -1,9 +1,14 @@ +import sys + import numpy as np from numpy import array from math import pi, cos, sin -from transonic import jit +from transonic import jit, wait_for_all_extensions + +IS_CPY = sys.implementation.name == "cpython" +IS_PYPY = sys.implementation.name == "pypy" # begin code functions (don't remove this line) @@ -75,15 +80,15 @@ def bench(n_sleds, n_time): # end code functions (don't remove this line) +if IS_CPY or IS_PYPY: -bench_pythran = jit(bench) -# Numba does not support this code... -# bench_numba = jit(backend="numba")(bench) -from transonic import wait_for_all_extensions + bench_pythran = jit(bench) + # Numba does not support this code... + # bench_numba = jit(backend="numba")(bench) -# warmup (compilation of the Pythran extension) -bench_pythran(1, 1) -wait_for_all_extensions() + # warmup (compilation of the Pythran extension) + bench_pythran(1, 1) + wait_for_all_extensions() if __name__ == "__main__": diff --git a/bench/bench_cpy_vs_hpy.py b/bench/bench_cpy_vs_hpy.py index 1b36278..1bb35dd 100644 --- a/bench/bench_cpy_vs_hpy.py +++ b/bench/bench_cpy_vs_hpy.py @@ -1,8 +1,11 @@ -import sys -import time import random +import socket +import sys + from math import pi, cos, sin from pathlib import Path +from pprint import pprint +from time import perf_counter here = Path(__file__).absolute().parent @@ -14,7 +17,7 @@ def my_randn(mod, n): return result -IS_PYPY = hasattr(sys, "pypy_version_info") +IS_CPY = sys.implementation.name == "cpython" def runge_kutta_step(mod, f, x0, dt, t=None): @@ -75,14 +78,18 @@ def bench(mod, n_sleds, n_time): u_init = mod.zeros(n_sleds) for i in range(n_sleds): u_init[i] += 3.5 - start = time.time() - solver(mod, board, x_init, y_init, u_init, v_init, 0.01, n_time) - end = time.time() - return end - start + times = [] + for _ in range(20): + start = perf_counter() + solver(mod, board, x_init, y_init, u_init, v_init, 0.01, n_time) + times.append(perf_counter() - start) + + times.sort() + return times[len(times) // 2] N_SLEDS = 100 -N_TIME = 2000 +N_TIME = 200 def import_piconumpy_hpy_universal(): @@ -101,18 +108,48 @@ def main(): import piconumpy._piconumpy_cpython_capi as pnp_capi - t = bench(pnp_capi, N_SLEDS, N_TIME) - print(f"CPython C-API: {t:.2f} seconds") + pprint({key: sys.implementation.__dict__[key] for key in ("cache_tag", "version")}) + print(f"hostname: {socket.gethostname()}") + + tmp_result_julia = Path("tmp_result_julia.txt") + if tmp_result_julia.exists(): + with open("tmp_result_julia.txt") as file: + norm = float(file.read()) + end = "" + print(f"Julia: {norm:.3f} seconds") + else: + norm = False + end = "\n" + + t_capi = bench(pnp_capi, N_SLEDS, N_TIME) + print(f"CPython C-API: {t_capi:.3f} seconds", end=end) + if norm: + print(f" ({t_capi/norm:4.1f} * Julia)") pnp_hpy_universal = import_piconumpy_hpy_universal() - t = bench(pnp_hpy_universal, N_SLEDS, N_TIME) - print(f"HPy [Universal]: {t:.2f} seconds") + t_hpy_univ = bench(pnp_hpy_universal, N_SLEDS, N_TIME) + print(f"HPy [Universal]: {t_hpy_univ:.3f} seconds", end=end) - if not IS_PYPY: + if norm: + print(f" ({t_hpy_univ/norm:4.1f} * Julia)") + + if IS_CPY: import piconumpy._piconumpy_hpy as pnp_hpy - t = bench(pnp_hpy, N_SLEDS, N_TIME) - print(f"HPy [CPy ABI]: {t:.2f} seconds") + t_hpy_cpy_abi = bench(pnp_hpy, N_SLEDS, N_TIME) + print(f"HPy [CPy ABI]: {t_hpy_cpy_abi:.3f} seconds", end=end) + + if norm: + print(f" ({t_hpy_cpy_abi/norm:4.1f} * Julia)") + + if not IS_CPY: + import piconumpy.purepy as pnp_with_list + + t_with_list = bench(pnp_with_list, N_SLEDS, N_TIME) + print(f"Python list: {t_with_list:.3f} seconds", end=end) + + if norm: + print(f" ({t_with_list/norm:4.1f} * Julia)") if __name__ == "__main__": diff --git a/bench/make_bench_piconumpy.py b/bench/make_bench_piconumpy.py index 4fbf5c0..4f92bcc 100644 --- a/bench/make_bench_piconumpy.py +++ b/bench/make_bench_piconumpy.py @@ -9,8 +9,9 @@ def create_tmp_file(name_module): if name_module == "_piconumpy_hpy_universal": code_import = """ -from piconumpy import _piconumpy_hpy -array = _piconumpy_hpy.array +from piconumpy.util_hpy import import_ext +ext = import_ext() +array = ext.array """ else: code_import = f"from piconumpy.{name_module} import array" @@ -42,12 +43,19 @@ def create_tmp_file(name_module): code = ( """ +import socket import sys + +from math import pi, cos, sin +from pathlib import Path +from pprint import pprint + import numpy as np + from piconumpy import array -from math import pi, cos, sin -IS_PYPY = hasattr(sys, 'pypy_version_info') +IS_CPY = sys.implementation.name == "cpython" + """ + code_functions + """ @@ -61,12 +69,16 @@ def create_tmp_file(name_module): from tmp_purepy_array import bench as bench_piconumpy_purepy_array from tmp_cython import bench as bench_cython -if not IS_PYPY: +if IS_CPY: from tmp_hpy import bench as bench_hpy +pprint({key: sys.implementation.__dict__[key] for key in ("cache_tag", "version")}) +print(f"hostname: {socket.gethostname()}") # get norm from Julia benchmark -with open("tmp_result_julia.txt") as file: - norm = float(file.read()) + +path_julia_result = Path("tmp_result_julia.txt") +assert path_julia_result.exists() +norm = float(path_julia_result.read_text()) max_length_name = len("piconumpy (CPython C-API)") + 2 @@ -74,12 +86,12 @@ def create_tmp_file(name_module): name = fmt_name.format("Julia") print(f"{name}: 1 * norm = {norm:4.3g} s") -n_sleds = 10 +n_sleds = 100 n_time = 200 g = locals() -def timeit(name_func, name): +def timeit(name_func, name, total_duration=2): return timeit_verbose( name_func + "(n_sleds, n_time)", globals=g, @@ -87,21 +99,28 @@ def timeit(name_func, name): print_time=False, norm=norm, max_length_name=max_length_name, + total_duration=total_duration, ) timeit("bench", name="PicoNumpy (CPython C-API)") -if not IS_PYPY: +if IS_CPY: timeit("bench_hpy", name="PicoNumpy (HPy CPy ABI)") timeit("bench_hpy_universal", name="PicoNumpy (HPy Universal)") timeit("bench_pythran", name="Transonic-Pythran") -timeit("bench_numpy", name="Numpy") +try: + timeit("bench_numpy", name="Numpy", total_duration=8) +except RuntimeError: + print("Skip bench_numpy because it's too slow") timeit( "bench_piconumpy_purepy", name="PicoNumpy (purepy)", ) timeit( "bench_piconumpy_purepy_array", name="PicoNumpy (purepy_array)", ) -timeit("bench_cython", name="PicoNumpy (Cython)") +try: + timeit("bench_cython", name="PicoNumpy (Cython)", total_duration=8) +except RuntimeError: + print("Skip bench_cython because it's too slow") """ ) diff --git a/bench/microbench_low_level/Makefile b/bench/microbench_low_level/Makefile new file mode 100644 index 0000000..5f874dd --- /dev/null +++ b/bench/microbench_low_level/Makefile @@ -0,0 +1,48 @@ + +IMPLEMENTATION=$(shell python -c 'import sys; print(sys.implementation.cache_tag)') + +.PHONY : clean bench_sum_loop bench_sum_loop_index bench_cort bench_init_zeros bench_instantiate + +bench_sum_loop: NAME_BENCH=sum_loop +bench_sum_loop: tmp/sum_loop_julia.txt _bench + +bench_sum_loop_index: NAME_BENCH=sum_loop_index +bench_sum_loop_index: tmp/sum_loop_index_julia.txt _bench + +bench_cort: NAME_BENCH=cort +bench_cort: tmp/cort_julia.txt _bench + +bench_init_zeros: NAME_BENCH=init_zeros +bench_init_zeros: tmp/init_zeros_julia.txt _bench + +bench_board: NAME_BENCH=board +bench_board: tmp/board_julia.txt _bench + +bench_instantiate: NAME_BENCH=instantiate +bench_instantiate: tmp/instantiate_julia.txt _bench + +bench_element_wise: NAME_BENCH=element_wise +bench_element_wise: tmp/element_wise_julia.txt _bench + +_bench: + @echo bench $(NAME_BENCH) + @python -c "from socket import gethostname as f; print('hostname:', f())" + @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})" + @python bench.py list $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_list.txt + @python bench.py purepy $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_purepy.txt + @python bench.py numpy $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_numpy.txt + @python bench.py _piconumpy_hpy $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_hpy.txt + @python bench.py _piconumpy_cpython_capi $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_cpy_api.txt + +tmp/%_julia.txt: julia/bench_%.jl + @mkdir -p tmp + @julia julia/bench_$*.jl > $@ + +clean: + rm -rf tmp + +produce_traces: tmp/sum_loop_julia.txt + @mkdir -p tmp + PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp/pypylog_list.txt pypy bench.py list + PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp/pypylog_piconumpy_list.txt pypy bench.py purepy + PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp/pypylog_piconumpy_hpy.txt pypy bench.py _piconumpy_hpy diff --git a/bench/microbench_low_level/README.md b/bench/microbench_low_level/README.md new file mode 100644 index 0000000..7965c76 --- /dev/null +++ b/bench/microbench_low_level/README.md @@ -0,0 +1,25 @@ +# Microbenchmarks low level Python code + +We measure the performance for functions containing low level Python code. + +- `sum_loop` (command `make bench`): `for value in arr` and summation + +- `sum_loop_index` (command `make bench_sum_loop_index`): + `for index in range(5000)` and summation + +- `init_zeros` (command `make bench_init_zeros`): set values to zeros + +- `cort` (command `make bench_cort`): normalized cosine similarity measure + between derivatives + +- `board` (command `make bench_board`): few indexing, simple float computations + with sin/cos and instantiation of a small array. + +- `instantiate` (command `make bench_instantiate`): dominated by the + instantiation/deletion of small arrays of 4 floats. + +- `element_wise` (command `make bench_element_wise`): dominated by the + instantiation/deletion of small arrays of 4 floats and calling element-wise + operations. + +The files result_*.txt contain few results. diff --git a/bench/microbench_low_level/bench.py b/bench/microbench_low_level/bench.py new file mode 100644 index 0000000..50a7d48 --- /dev/null +++ b/bench/microbench_low_level/bench.py @@ -0,0 +1,186 @@ +import sys +from time import perf_counter +from pathlib import Path +from random import random +from math import sqrt, pi, sin, cos + +try: + method = sys.argv[1] +except IndexError: + method = "purepy" + +try: + name_bench = sys.argv[2] +except IndexError: + name_bench = "sum_loop" + +try: + size = sys.argv[3] +except IndexError: + size = None + +if method == "_piconumpy_hpy": + from piconumpy.util_hpy import import_ext + + ext = import_ext() + array = ext.array +elif method == "list": + array = list + if name_bench == "element_wise": + sys.exit(0) + +elif method == "numpy": + + try: + import numpy as np + except ImportError: + print(f"{method:30s}: ImportError numpy") + sys.exit(0) + + array = np.array +else: + d = {} + exec(f"from piconumpy.{method} import array", d) + array = d["array"] + if "piconumpy" not in method: + method = f"piconumpy.{method}" + +if "_piconumpy_" in method: + method = method.replace("_piconumpy_", "piconumpy.") + +if method.endswith("hpy"): + method += " (universal)" + +tmp_result_julia = Path(f"tmp/{name_bench}_julia.txt") +if tmp_result_julia.exists(): + with open(tmp_result_julia) as file: + norm = float(file.read()) +else: + raise RuntimeError( + f"{tmp_result_julia} does not exist. First execute with `make`" + ) + + +def sum_loop(arr): + result = 0.0 + for value in arr: + result += value + return result + + +def sum_loop_index(arr): + result = 0.0 + for index in range(5000): + result += arr[index] + return result + + +def init_zeros(arr): + for index in range(len(arr)): + arr[index] = 0.0 + + +def _cort(s1, s2): + num = 0.0 + sum_square_x = 0.0 + sum_square_y = 0.0 + for t in range(len(s1) - 1): + slope_1 = s1[t + 1] - s1[t] + slope_2 = s2[t + 1] - s2[t] + num += slope_1 * slope_2 + sum_square_x += slope_1 * slope_1 + sum_square_y += slope_2 * slope_2 + return num / (sqrt(sum_square_x * sum_square_y)) + + +def cort(arr): + return _cort(arr, arr) + + +def board(X_0): + x0 = X_0[0] + y0 = X_0[1] + u0 = X_0[2] + v0 = X_0[3] + + g = 9.81 + b = 0.5 + a = 0.25 + c = 0.5 + p = (2 * pi) / 10.0 + q = (2 * pi) / 4.0 + + H_x = -a + b * p * sin(p * x0) * cos(q * y0) + H_xx = b * p ** 2 * cos(p * x0) * cos(q * y0) + H_y = b * q * cos(p * x0) * sin(q * y0) + H_yy = b * q ** 2 * cos(p * x0) * cos(q * y0) + H_xy = -b * q * p * sin(p * x0) * sin(q * y0) + + F = (g + H_xx * u0 ** 2 + 2 * H_xy * u0 * v0 + H_yy * v0 ** 2) / ( + 1 + H_x ** 2 + H_y ** 2 + ) + + dU = -F * H_x - c * u0 + dV = -F * H_y - c * v0 + + return array([u0, v0, dU, dV]) + + +def instantiate(arr): + x = arr[0] + result = array([x, 3 * x, 6 * x, 9 * x]) + result[0] = 2 * result[1] + return result + + +def element_wise(arr): + + dt = 0.1 + x0 = arr + + k1 = x0 * dt + k2 = (x0 + k1 / 2) * dt + k3 = (x0 + k2 / 2) * dt + k4 = (x0 + k3) * dt + # workaround for a pypy bug + # see https://foss.heptapod.net/pypy/pypy/-/issues/3509 + # x_new = x0 + (k1 + 2 * k2 + 2 * k3 + k4) / 6 + x_new = x0 + (k1 + k2 * 2 + k3 * 2 + k4) / 6 + return x_new + + +compute_from_arr = locals()[name_bench] + +if size is None: + if name_bench.startswith("sum_loop") or name_bench == "cort": + size = 10000 + else: + size = 4 + +print(f"{method:30s}:", end="", flush=True) + +# warming during ~ 1s +data_as_list = [random() for _ in range(size)] +arr = array(data_as_list) +t_start = perf_counter() +while perf_counter() - t_start < 1.0: + compute_from_arr(arr) + + +def median(sequence): + tmp = sorted(sequence) + return tmp[len(tmp) // 2] + + +# measure during ~ 4s +t0 = perf_counter() +times = [] +while perf_counter() - t0 < 4.0: + data_as_list = [random() for _ in range(size)] + arr = array(data_as_list) + t_start = perf_counter() + compute_from_arr(arr) + times.append(perf_counter() - t_start) + +time = median(times) +print(f" {time:.2e} s ({time / norm:5.1f} * Julia)") diff --git a/bench/microbench_low_level/julia/bench_board.jl b/bench/microbench_low_level/julia/bench_board.jl new file mode 100644 index 0000000..69d8b64 --- /dev/null +++ b/bench/microbench_low_level/julia/bench_board.jl @@ -0,0 +1,44 @@ +using Statistics + +function board(X_0::Array) + + x0 = copy(X_0[1]) + y0 = copy(X_0[2]) + u0 = copy(X_0[3]) + v0 = copy(X_0[4]) + + g = 9.81 + a = 0.25 + b = 0.5 + c = 0.5 + p = (2*π)/10.0 + q = (2*π)/4.0 + + H_x = -a + b*p*sin(p*x0)*cos(q*y0) + H_xx = b*p^2 * cos(p*x0)*cos(q*y0) + H_y = b*q*cos(p*x0)*sin(q*y0) + H_yy = b*q^2 * cos(p*x0)*cos(q*y0) + H_xy = -b*q*p*sin(p*x0)*sin(q*y0) + + F = (g + H_xx*u0^2 + 2*H_xy*u0*v0 + H_yy*v0^2)/(1 + H_x^2 + H_y^2) + + dU = -F*H_x - c*u0 + dV = -F*H_y - c*v0 + + return [u0, v0, dU, dV] + +end + +compute_from_arr = board + +size = 4 +nb_runs = 200 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed compute_from_arr(arr) +end + +println(median(times)) diff --git a/bench/microbench_low_level/julia/bench_cort.jl b/bench/microbench_low_level/julia/bench_cort.jl new file mode 100644 index 0000000..a816541 --- /dev/null +++ b/bench/microbench_low_level/julia/bench_cort.jl @@ -0,0 +1,35 @@ +using Statistics + + +function cort(s1, s2) + num = 0.0 + sum_square_x = 0.0 + sum_square_y = 0.0 + for t in 1:length(s1)-1 + slope_1 = s1[t + 1] - s1[t] + slope_2 = s2[t + 1] - s2[t] + num += slope_1 * slope_2 + sum_square_x += slope_1 * slope_1 + sum_square_y += slope_2 * slope_2 + end + return num / (sqrt(sum_square_x * sum_square_y)) +end + +function use_cort(arr) + return cort(arr, arr) +end + + +compute_from_arr = use_cort + +size = 10000 +nb_runs = 200 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed compute_from_arr(arr) +end + +println(median(times)) diff --git a/bench/microbench_low_level/julia/bench_element_wise.jl b/bench/microbench_low_level/julia/bench_element_wise.jl new file mode 100644 index 0000000..c91a16f --- /dev/null +++ b/bench/microbench_low_level/julia/bench_element_wise.jl @@ -0,0 +1,30 @@ +using Statistics + +function element_wise(arr::Array) + + dt = 0.1 + x0 = arr + + k1 = x0 * dt + k2 = (x0 + k1 / 2) * dt + k3 = (x0 + k2 / 2) * dt + k4 = (x0 + k3) * dt + x_new = x0 + (k1 + 2 * k2 + 2 * k3 + k4) / 6 + + return x_new + +end + +compute_from_arr = element_wise + +size = 4 +nb_runs = 2000 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed compute_from_arr(arr) +end + +println(median(times)) diff --git a/bench/microbench_low_level/julia/bench_init_zeros.jl b/bench/microbench_low_level/julia/bench_init_zeros.jl new file mode 100644 index 0000000..4ac2656 --- /dev/null +++ b/bench/microbench_low_level/julia/bench_init_zeros.jl @@ -0,0 +1,21 @@ +using Statistics + +function init_zeros(arr) + for i in eachindex(arr) + arr[i] = 0.0 + end +end + +compute_from_arr = init_zeros + +size = 4 +nb_runs = 200 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed compute_from_arr(arr) +end + +println(median(times)) diff --git a/bench/microbench_low_level/julia/bench_instantiate.jl b/bench/microbench_low_level/julia/bench_instantiate.jl new file mode 100644 index 0000000..5116e07 --- /dev/null +++ b/bench/microbench_low_level/julia/bench_instantiate.jl @@ -0,0 +1,22 @@ +using Statistics + +function instantiate(arr::Array) + x = arr[1] + result = [x, 3*x, 6*x, 9*x] + result[1] = 2 * result[2] + return result +end + +compute_from_arr = instantiate + +size = 4 +nb_runs = 200 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed compute_from_arr(arr) +end + +println(median(times)) diff --git a/bench/microbench_low_level/julia/bench_sum_loop.jl b/bench/microbench_low_level/julia/bench_sum_loop.jl new file mode 100644 index 0000000..5c38b52 --- /dev/null +++ b/bench/microbench_low_level/julia/bench_sum_loop.jl @@ -0,0 +1,23 @@ +using Statistics + +function sum_loop(arr) + result = 0. + for i in eachindex(arr) + result += arr[i] + end + return result +end + +compute_from_arr = sum_loop + +size = 10000 +nb_runs = 200 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed compute_from_arr(arr) +end + +println(median(times)) diff --git a/bench/microbench_low_level/julia/bench_sum_loop_index.jl b/bench/microbench_low_level/julia/bench_sum_loop_index.jl new file mode 100644 index 0000000..b4c682c --- /dev/null +++ b/bench/microbench_low_level/julia/bench_sum_loop_index.jl @@ -0,0 +1,23 @@ +using Statistics + +function sum_loop_index(arr) + result = 0. + for i = 1:5000 + result += arr[i] + end + return result +end + +compute_from_arr = sum_loop_index + +size = 10000 +nb_runs = 200 + +times = zeros(nb_runs) + +for irun in 1:nb_runs + arr = rand(size) + times[irun] = @elapsed compute_from_arr(arr) +end + +println(median(times)) diff --git a/bench/microbench_low_level/result_board.md b/bench/microbench_low_level/result_board.md new file mode 100644 index 0000000..30b407b --- /dev/null +++ b/bench/microbench_low_level/result_board.md @@ -0,0 +1,77 @@ +# Microbenchmark board + +We measure the performance for this function: + +```python +def board(X_0): + x0 = X_0[0] + y0 = X_0[1] + u0 = X_0[2] + v0 = X_0[3] + + g = 9.81 + b = 0.5 + a = 0.25 + c = 0.5 + p = (2 * pi) / 10.0 + q = (2 * pi) / 4.0 + + H_x = -a + b * p * sin(p * x0) * cos(q * y0) + H_xx = b * p ** 2 * cos(p * x0) * cos(q * y0) + H_y = b * q * cos(p * x0) * sin(q * y0) + H_yy = b * q ** 2 * cos(p * x0) * cos(q * y0) + H_xy = -b * q * p * sin(p * x0) * sin(q * y0) + + F = (g + H_xx * u0 ** 2 + 2 * H_xy * u0 * v0 + H_yy * v0 ** 2) / ( + 1 + H_x ** 2 + H_y ** 2 + ) + + dU = -F * H_x - c * u0 + dV = -F * H_y - c * v0 + + return array([u0, v0, dU, dV]) +``` + +One can run the benchmarks with `make bench_board`. + +With PyPy3.7, I get: + +``` +bench board +hostname: voyage +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} +list : 3.21e-07 s ( 0.9 * Julia) +piconumpy.purepy : 1.37e-05 s ( 36.9 * Julia) +numpy : 1.18e-04 s (316.6 * Julia) +piconumpy.hpy : 1.26e-05 s ( 33.8 * Julia) +piconumpy.cpython_capi : 5.52e-05 s (148.6 * Julia) +``` + +With CPython: + +``` +bench board +hostname: voyage +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} +list : 5.16e-06 s ( 13.9 * Julia) +piconumpy.purepy : 8.04e-06 s ( 21.6 * Julia) +numpy : 1.01e-05 s ( 27.1 * Julia) +piconumpy.hpy : 5.90e-06 s ( 15.9 * Julia) +piconumpy.cpython_capi : 5.56e-06 s ( 15.0 * Julia) +``` + +With Python 3.8.5 (GraalVM CE Native 21.3.0) + +``` +bench board +hostname: voyage +{'cache_tag': 'graalpython-38', + 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} +list : 1.15e-05 s ( 30.9 * Julia) +piconumpy.purepy : 1.74e-05 s ( 46.8 * Julia) +numpy : ImportError numpy +piconumpy.hpy : 4.91e-05 s (132.2 * Julia) +piconumpy.cpython_capi : 6.19e-05 s (166.7 * Julia) +``` diff --git a/bench/microbench_low_level/result_cort.md b/bench/microbench_low_level/result_cort.md new file mode 100644 index 0000000..b5578bf --- /dev/null +++ b/bench/microbench_low_level/result_cort.md @@ -0,0 +1,64 @@ +# Microbenchmark cort + +We measure the performance for this function: + +```python +def cort(arr): + return _cort(arr, arr) + +def _cort(s1, s2): + num = 0.0 + sum_square_x = 0.0 + sum_square_y = 0.0 + for t in range(len(s1) - 1): + slope_1 = s1[t + 1] - s1[t] + slope_2 = s2[t + 1] - s2[t] + num += slope_1 * slope_2 + sum_square_x += slope_1 * slope_1 + sum_square_y += slope_2 * slope_2 + return num / (sqrt(sum_square_x * sum_square_y)) +``` + +One can run the benchmarks with `make bench_cort`. + +With PyPy3.7, I get: + +``` +bench cort +hostname: voyage +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} +list : 4.29e-05 s ( 1.8 * Julia) +piconumpy.purepy : 4.12e-05 s ( 1.7 * Julia) +numpy : 4.77e-02 s (1975.5 * Julia) +piconumpy.hpy : 1.46e-03 s ( 60.5 * Julia) +piconumpy.cpython_capi : 6.96e-03 s (288.5 * Julia) +``` + +With CPython: + +``` +bench cort +hostname: voyage +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} +list : 4.42e-03 s (183.4 * Julia) +piconumpy.purepy : 1.04e-02 s (430.0 * Julia) +numpy : 9.76e-03 s (404.4 * Julia) +piconumpy.hpy : 5.66e-03 s (234.7 * Julia) +piconumpy.cpython_capi : 4.77e-03 s (197.7 * Julia) +``` + +With Python 3.8.5 (GraalVM CE Native 21.3.0) + +``` +bench cort +hostname: voyage +{'cache_tag': 'graalpython-38', + 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} +list : 2.44e-05 s ( 1.0 * Julia) +piconumpy.purepy : 3.13e-05 s ( 1.3 * Julia) +numpy : ImportError numpy +piconumpy.hpy : 1.69e-04 s ( 7.0 * Julia) +piconumpy.cpython_capi : 3.55e-04 s ( 14.7 * Julia) +``` diff --git a/bench/microbench_low_level/result_init_zeros.md b/bench/microbench_low_level/result_init_zeros.md new file mode 100644 index 0000000..b88e4bd --- /dev/null +++ b/bench/microbench_low_level/result_init_zeros.md @@ -0,0 +1,53 @@ +# Microbenchmark sum_init_zeros + +We measure the performance for this function: + +```python +def init_zeros(arr): + for index in range(len(arr)): + arr[index] = 0.0 +``` + +One can run the benchmarks with `make bench_init_zeros`. + +With PyPy3.7, I get: + +``` +bench init_zeros +hostname: voyage +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} +list : 2.63e-05 s ( 5.4 * Julia) +piconumpy.purepy : 2.99e-05 s ( 6.1 * Julia) +numpy : 1.17e-02 s (2403.5 * Julia) +piconumpy.hpy : 4.58e-04 s ( 94.1 * Julia) +piconumpy.cpython_capi : 8.46e-04 s (173.6 * Julia) +``` + +With CPython: + +``` +bench init_zeros +hostname: voyage +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} +list : 5.34e-04 s (109.6 * Julia) +piconumpy.purepy : 2.03e-03 s (417.4 * Julia) +numpy : 1.17e-03 s (239.3 * Julia) +piconumpy.hpy : 7.51e-04 s (154.1 * Julia) +piconumpy.cpython_capi : 5.44e-04 s (111.5 * Julia) +``` + +With Python 3.8.5 (GraalVM CE Native 21.3.0) + +``` +bench init_zeros +hostname: voyage +{'cache_tag': 'graalpython-38', + 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} +list : 1.37e-05 s ( 2.8 * Julia) +piconumpy.purepy : 1.93e-05 s ( 4.0 * Julia) +numpy : ImportError numpy +piconumpy.hpy : 4.68e-05 s ( 9.6 * Julia) +piconumpy.cpython_capi : 1.74e-04 s ( 35.8 * Julia) +``` diff --git a/bench/microbench_low_level/result_instantiate.md b/bench/microbench_low_level/result_instantiate.md new file mode 100644 index 0000000..883cea1 --- /dev/null +++ b/bench/microbench_low_level/result_instantiate.md @@ -0,0 +1,55 @@ +# Microbenchmark instantiate + +We measure the performance for this function: + +```python +def instantiate(arr): + x = arr[0] + result = array([x, 3 * x, 6 * x, 9 * x]) + result[0] = 2 * result[1] + return result +``` + +One can run the benchmarks with `make bench_instantiate`. + +With PyPy3.7, I get: + +``` +bench instantiate +hostname: meige8pcpa79 +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} +list : 1.13e-07 s ( 0.9 * Julia) +piconumpy.purepy : 8.50e-08 s ( 0.7 * Julia) +numpy : ImportError numpy +piconumpy.hpy : 1.69e-06 s ( 13.1 * Julia) +piconumpy.cpython_capi : 1.53e-05 s (118.3 * Julia) +``` + +With CPython: + +``` +bench instantiate +hostname: meige8pcpa79 +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} +list : 1.19e-06 s ( 9.2 * Julia) +piconumpy.purepy : 2.59e-06 s ( 20.0 * Julia) +numpy : 3.63e-06 s ( 28.1 * Julia) +piconumpy.hpy : 1.84e-06 s ( 14.3 * Julia) +piconumpy.cpython_capi : 1.35e-06 s ( 10.5 * Julia) +``` + +With Python 3.8.5 (GraalVM CE Native 21.3.0) + +``` +bench instantiate +hostname: meige8pcpa79 +{'cache_tag': 'graalpython-38', + 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} +list : 4.16e-06 s ( 32.3 * Julia) +piconumpy.purepy : 4.15e-06 s ( 32.2 * Julia) +numpy : ImportError numpy +piconumpy.hpy : 7.32e-06 s ( 56.8 * Julia) +piconumpy.cpython_capi : 9.68e-06 s ( 75.0 * Julia) +``` diff --git a/bench/microbench_low_level/result_sum_loop.md b/bench/microbench_low_level/result_sum_loop.md new file mode 100644 index 0000000..062840b --- /dev/null +++ b/bench/microbench_low_level/result_sum_loop.md @@ -0,0 +1,201 @@ +# Microbenchmark sum_loop + +We measure the performance for this function: + +```python +def sum_loop(arr): + result = 0.0 + for value in arr: + result += value + return result +``` + +One can run the benchmarks with `make bench_sum_loop`. + +With PyPy3.7, I get: + +``` +bench sum_loop +hostname: voyage +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} +list : 2.35e-05 s ( 1.8 * Julia) +piconumpy.purepy : 2.60e-05 s ( 2.0 * Julia) +numpy : 8.97e-03 s (677.0 * Julia) +piconumpy.hpy : 3.73e-04 s ( 28.2 * Julia) +piconumpy.cpython_capi : 1.75e-03 s (132.4 * Julia) +``` + +With CPython: + +``` +bench sum_loop +hostname: voyage +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} +list : 3.65e-04 s ( 27.5 * Julia) +piconumpy.purepy : 2.17e-03 s (164.1 * Julia) +numpy : 1.09e-03 s ( 82.2 * Julia) +piconumpy.hpy : 7.39e-04 s ( 55.8 * Julia) +piconumpy.cpython_capi : 5.07e-04 s ( 38.3 * Julia) +``` + +With Python 3.8.5 (GraalVM CE Native 21.3.0) + +``` +bench sum_loop +hostname: voyage +{'cache_tag': 'graalpython-38', + 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} +list : 1.92e-05 s ( 1.4 * Julia) +piconumpy.purepy : 3.61e-05 s ( 2.7 * Julia) +numpy : ImportError numpy +piconumpy.hpy : 5.03e-04 s ( 38.0 * Julia) +piconumpy.cpython_capi : 2.90e-03 s (219.1 * Julia) +``` + +## Summary + +- PyPy is fast with list (1.3 * Julia, same order of magnitude than with Julia) +and as fast for a piconumpy array based on a list ("piconumpy.purepy", zero +cost abstraction!) + +- Numpy and _piconumpy_cpython_capi are both much slower with PyPy than with +Cpython. We can guess that the Numpy port to HPy would fix that. + +- piconumpy_hpy is a bit faster with PyPy (19 * Julia) than with CPython (40 * +Julia), however, we see that PyPy does not strongly accelerate piconumpy_hpy +(19 * Julia, 14 * piconumpy_list). + +## Traces PyPy `sum_loop` + +### List + +``` ++557: label(p0, p1, p6, p9, f35, f30, p15, p22, p26, i32, i27, p29, descr=TargetToken(140447503809120)) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++606: i44 = uint_ge(i32, i27) +guard_false(i44, descr=) [p0, p6, p9, p15, p1, i32, i27, i44, p26, f30, f35] ++615: f45 = getarrayitem_gc_f(p29, i32, descr=) ++622: i47 = int_add(i32, 1) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD') ++626: f48 = float_add(f35, f45) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE') ++630: setfield_gc(p15, i47, descr=) ++634: guard_not_invalidated(descr=) [p0, p6, p9, p15, p1, f45, f48, None, None] ++634: i51 = getfield_raw_i(140447672379264, descr=) ++647: i53 = int_sub(i51, 1) ++651: setfield_raw(140447672379264, i53, descr=) ++654: i56 = int_lt(i53, 0) ++658: guard_false(i56, descr=) [p0, p6, p9, p15, p1, i53, f45, f48, None, None] +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++664: i57 = arraylen_gc(p29, descr=) ++664: jump(p0, p1, p6, p9, f48, f45, p15, p22, p26, i47, i27, p29, descr=TargetToken(140447503809120)) +``` + +### piconumpy purepy (based on list) + +``` ++705: label(p0, p1, p6, p9, f53, f46, p15, p22, i49, p29, p38, p42, i43, p45, descr=TargetToken(139748702723776)) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++760: guard_not_invalidated(descr=) [p0, p6, p9, p15, p1, p22, i49, f46, f53] ++760: p62 = force_token() ++760: enter_portal_frame(21, 28364) +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#0 LOAD_FAST') +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#2 LOAD_ATTR') +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#4 LOAD_FAST') +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#6 BINARY_SUBSCR') ++760: i65 = uint_ge(i49, i43) ++763: guard_false(i65, descr=) [p0, p6, p9, p15, p1, p22, i49, f46, f53] ++769: f66 = getarrayitem_gc_f(p45, i49, descr=) +debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#8 RETURN_VALUE') ++776: leave_portal_frame(21) ++776: i69 = int_add(i49, 1) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD') ++780: f70 = float_add(f53, f66) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE') ++784: i72 = getfield_raw_i(139748871243648, descr=) ++797: i74 = int_sub(i72, 3) ++801: setfield_raw(139748871243648, i74, descr=) ++804: setfield_gc(p15, i69, descr=) ++808: i77 = int_lt(i74, 0) ++812: guard_false(i77, descr=) [p0, p6, p9, p15, p1, i74, f66, f70, None, None, None] +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++818: i78 = arraylen_gc(p45, descr=) ++818: jump(p0, p1, p6, p9, f70, f66, p15, p22, i69, p29, p38, p42, i43, p45, descr=TargetToken(139748702723776)) +``` + +### piconumpy hpy + +``` ++1339: label(p0, p1, p6, p9, f73, p63, p15, i68, p62, descr=TargetToken(139865876151520)) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++1352: p82 = getfield_gc_r(p15, descr=) ++1356: guard_nonnull_class(p82, 139866025815200, descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1376: p84 = getfield_gc_r(p82, descr=) ++1387: guard_value(p84, ConstPtr(ptr85), descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1396: guard_not_invalidated(descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1403: p87 = getfield_gc_r(ConstPtr(ptr86), descr=) ++1414: guard_value(p87, ConstPtr(ptr88), descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1423: i90 = getfield_gc_i(ConstPtr(ptr89), descr=) ++1427: i92 = int_lt(i68, 0) ++1431: guard_false(i92, descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1444: i94 = getfield_gc_i(ConstPtr(ptr93), descr=) ++1448: i95 = int_is_zero(i94) ++1451: guard_false(i95, descr=) [p0, p6, p9, p63, p15, p1, p82, f73] ++1457: i97 = int_sub(i94, 1) ++1461: p99 = getfield_gc_r(ConstPtr(ptr98), descr=) ++1465: i100 = getarrayitem_gc_i(p99, i97, descr=) ++1470: i101 = arraylen_gc(p99, descr=) ++1474: i103 = int_rshift(i101, 1) ++1477: i105 = int_sub(i103, 5) ++1481: i106 = int_lt(i97, i105) ++1484: cond_call(i106, ConstClass(_ll_list_resize_hint_really_look_inside_iff__listPtr_Signed_Bool), ConstPtr(ptr108), i97, 0, descr=) ++1490: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i68, i90, i100, p82, i97, f73] ++1490: setfield_gc(ConstPtr(ptr110), i97, descr=) ++1494: i112 = int_lt(i100, 0) ++1498: guard_false(i112, descr=) [p0, p6, p9, p63, p15, p1, i68, i90, i100, p82, f73] ++1522: setarrayitem_gc(p62, i100, p82, descr=) ++1527: p113 = force_token() ++1548: setfield_gc(p0, p113, descr=) ++1552: i115 = call_may_force_i(i90, 139866044538144, i100, i68, descr=) ++1663: guard_not_forced(descr=) [p0, p6, p9, p63, p15, p1, i100, i115, i68, p82, f73] ++1674: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i100, i115, i68, p82, f73] ++1688: call_n(ConstClass(close), i100, descr=) ++1754: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i115, i68, p82, f73] ++1768: i117 = int_is_true(i115) ++1771: guard_true(i117, descr=) [p0, p6, p9, p63, p15, p1, i115, i68, p82, f73] ++1784: p119 = getfield_gc_r(ConstPtr(ptr118), descr=) ++1788: p120 = getarrayitem_gc_r(p119, i115, descr=) ++1793: call_n(ConstClass(close), i115, descr=) ++1866: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, p120, i68, p82, f73] ++1880: guard_nonnull_class(p120, ConstClass(W_FloatObject), descr=) [p0, p6, p9, p63, p15, p1, p120, i68, p82, f73] ++1907: i123 = getfield_gc_i(p15, descr=) ++1918: i125 = int_add(i123, 1) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD') ++1923: setfield_gc(p15, i125, descr=) ++1927: f126 = getfield_gc_f(p120, descr=) ++1933: f127 = float_add(f73, f126) +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST') +debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE') ++1947: guard_not_invalidated(descr=) [p0, p6, p9, p120, p15, p1, f127, None, None, None] ++1947: i129 = getfield_raw_i(139866044675968, descr=) ++1960: i131 = int_sub(i129, 3) ++1964: setfield_raw(139866044675968, i131, descr=) ++1967: i134 = int_lt(i131, 0) ++1971: guard_false(i134, descr=) [p0, p6, p9, p120, p15, p1, i131, f127, None, None, None] +debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER') ++1977: i135 = arraylen_gc(p119, descr=) ++1977: jump(p0, p1, p6, p9, f127, p120, p15, i125, p119, descr=TargetToken(139865876151520)) +``` diff --git a/bench/microbench_low_level/result_sum_loop_index.md b/bench/microbench_low_level/result_sum_loop_index.md new file mode 100644 index 0000000..fd63301 --- /dev/null +++ b/bench/microbench_low_level/result_sum_loop_index.md @@ -0,0 +1,55 @@ +# Microbenchmark sum_loop_index + +We measure the performance for this function: + +```python +def sum_loop_index(arr): + result = 0.0 + for index in range(5000): + result += arr[index] + return result +``` + +One can run the benchmarks with `make bench_sum_loop_index`. + +With PyPy3.7, I get: + +``` +bench sum_loop_index +hostname: voyage +{'cache_tag': 'pypy37', + 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)} +list : 1.19e-05 s ( 2.0 * Julia) +piconumpy.purepy : 1.64e-05 s ( 2.8 * Julia) +numpy : 4.18e-03 s (711.4 * Julia) +piconumpy.hpy : 1.73e-04 s ( 29.4 * Julia) +piconumpy.cpython_capi : 8.44e-04 s (143.8 * Julia) +``` + +With CPython: + +``` +bench sum_loop_index +hostname: voyage +{'cache_tag': 'cpython-39', + 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)} +list : 3.91e-04 s ( 66.5 * Julia) +piconumpy.purepy : 1.11e-03 s (188.3 * Julia) +numpy : 8.93e-04 s (152.1 * Julia) +piconumpy.hpy : 5.42e-04 s ( 92.3 * Julia) +piconumpy.cpython_capi : 4.17e-04 s ( 71.0 * Julia) +``` + +With Python 3.8.5 (GraalVM CE Native 21.3.0) + +``` +bench sum_loop_index +hostname: voyage +{'cache_tag': 'graalpython-38', + 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)} +list : 1.36e-05 s ( 2.3 * Julia) +piconumpy.purepy : 1.81e-05 s ( 3.1 * Julia) +numpy : ImportError numpy +piconumpy.hpy : 3.68e-05 s ( 6.3 * Julia) +piconumpy.cpython_capi : 1.08e-04 s ( 18.5 * Julia) +``` diff --git a/bench/profile_piconumpy.py b/bench/profile_piconumpy.py index b7de388..3bde5ae 100644 --- a/bench/profile_piconumpy.py +++ b/bench/profile_piconumpy.py @@ -7,12 +7,14 @@ import tmp_purepy import tmp_purepy_array import tmp_cython +import tmp_hpy_universal methods = { "cpython-c-api": bench_array1d, "purepy": tmp_purepy, "purepy_array": tmp_purepy_array, "cython": tmp_cython, + "universal": tmp_hpy_universal, } module = methods.get(sys.argv[-1], bench_array1d) diff --git a/piconumpy/bench.py b/piconumpy/bench.py index a704e5f..f5d4d8d 100644 --- a/piconumpy/bench.py +++ b/piconumpy/bench.py @@ -11,6 +11,13 @@ def timeit_verbose( print_time=False, max_length_name=33, ): + if name is None: + name = stmt.split("(")[0] + + fmt_name = f"{{:{max_length_name}s}}" + name = fmt_name.format(name) + print(f"{name}:", end="", flush=True) + result = timeit( stmt, setup=setup, total_duration=total_duration, globals=globals ) @@ -20,18 +27,12 @@ def timeit_verbose( else: norm_given = True - if name is None: - name = stmt.split("(")[0] - - fmt_name = f"{{:{max_length_name}s}}" - name = fmt_name.format(name) - if print_time: raw_time = f" = {result:7.3g} s" else: raw_time = "" - print(f"{name}: {result/norm:5.3g} * norm{raw_time}") + print(f" {result/norm:5.3g} * norm{raw_time}") if not norm_given and not print_time: print(f"norm = {norm:5.3g} s") diff --git a/piconumpy/purepy.py b/piconumpy/purepy.py index a84ad31..bfa4b03 100644 --- a/piconumpy/purepy.py +++ b/piconumpy/purepy.py @@ -2,7 +2,7 @@ class array: __slots__ = ["data", "size"] def __init__(self, data): - self.data = list(float(number) for number in data) + self.data = list(data) self.size = len(self.data) def __add__(self, other): @@ -30,9 +30,10 @@ def __getitem__(self, index): def __setitem__(self, index, value): self.data[index] = value + def empty(size): - return array([0]*size) + return array([0] * size) -def zeros(size): - return array([0]*size) +def zeros(size): + return array([0] * size) diff --git a/piconumpy/purepy_array.py b/piconumpy/purepy_array.py index ba801a2..7306cff 100644 --- a/piconumpy/purepy_array.py +++ b/piconumpy/purepy_array.py @@ -23,8 +23,10 @@ def __mul__(self, other): def __truediv__(self, other): return self.__class__(number / other for number in self) + def empty(size): - return array([0]*size) + return array([0] * size) + def zeros(size): - return array([0]*size) + return array([0] * size) diff --git a/piconumpy/test_cpython_capi.py b/piconumpy/test_cpython_capi.py index a1638dc..cedbed5 100644 --- a/piconumpy/test_cpython_capi.py +++ b/piconumpy/test_cpython_capi.py @@ -6,6 +6,7 @@ class Tests: piconumpy = _piconumpy_cpython_capi + def _array(self, *args): return self.piconumpy.array(*args) diff --git a/piconumpy/test_cython.py b/piconumpy/test_cython.py index 44cf1c5..438adc7 100644 --- a/piconumpy/test_cython.py +++ b/piconumpy/test_cython.py @@ -1,4 +1,5 @@ from .test_cpython_capi import Tests as _Tests + class Tests(_Tests): from . import _piconumpy_cython as piconumpy diff --git a/piconumpy/test_hpy_universal.py b/piconumpy/test_hpy_universal.py index 358f037..2a470ca 100644 --- a/piconumpy/test_hpy_universal.py +++ b/piconumpy/test_hpy_universal.py @@ -1,16 +1,31 @@ +import sys + import pytest +from .util_hpy import import_ext from .test_cpython_capi import Tests as _Tests try: - from . import _piconumpy_hpy + piconumpy_universal = import_ext() except ImportError: - _piconumpy_hpy = False + piconumpy_universal = False @pytest.mark.skipif( - not _piconumpy_hpy, reason="ImportError piconumpy HPy Universal" + not piconumpy_universal, reason="ImportError piconumpy HPy Universal" ) class TestsCPyABI(_Tests): - piconumpy = _piconumpy_hpy + piconumpy = piconumpy_universal + + def test_multiply(self): + if sys.implementation.name == "pypy": + pytest.xfail("Expected failure with PyPy (but should work)") + + super().test_multiply() + + def test_add(self): + if sys.implementation.name == "pypy": + pytest.xfail("Expected failure with PyPy (but should work)") + + super().test_add() diff --git a/piconumpy/test_purepy.py b/piconumpy/test_purepy.py index 0793611..e7320e0 100644 --- a/piconumpy/test_purepy.py +++ b/piconumpy/test_purepy.py @@ -1,4 +1,5 @@ from .test_cpython_capi import Tests as _Tests + class Tests(_Tests): from . import purepy as piconumpy diff --git a/piconumpy/test_purepy_array.py b/piconumpy/test_purepy_array.py index b41a8b7..4c3da8c 100644 --- a/piconumpy/test_purepy_array.py +++ b/piconumpy/test_purepy_array.py @@ -1,4 +1,5 @@ from .test_cpython_capi import Tests as _Tests + class Tests(_Tests): from . import purepy_array as piconumpy diff --git a/piconumpy/util_hpy.py b/piconumpy/util_hpy.py new file mode 100644 index 0000000..1fbc47c --- /dev/null +++ b/piconumpy/util_hpy.py @@ -0,0 +1,21 @@ +from importlib.util import spec_from_file_location +from pathlib import Path + +from hpy.universal import load + + +def import_from_path(path): + name_ext = "_piconumpy_hpy" + ext_filepath = str(path) + spec = spec_from_file_location(name_ext, ext_filepath) + m = load(name_ext, ext_filepath, spec) + m.__file__ = ext_filepath + m.__loader__ = __loader__ + m.__name__ = __name__ + m.__package__ = __package__ + return m + + +def import_ext(): + path = Path(__file__).parent / "_piconumpy_hpy.hpy0.so" + return import_from_path(path) diff --git a/pyproject.toml b/pyproject.toml index 3234fad..ff6b793 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,10 +5,12 @@ description = "An experiment about Numpy and pyhandle/hpy." authors = [ {name = "Pierre Augier", email = "pierre.augier@univ-grenoble-alpes.fr"}, ] -license = {text = "BSD 3-Clause"} +license = "BSD-3-Clause" +license-files = ["LICENSE"] readme = "README.md" keywords = ["numpy", "hpy", "PyPy"] requires-python = ">=3.8" +dependencies = ["hpy>=0.9.0; implementation_name == 'cpython'"] [project.urls] homepage = "https://github.com/paugier/piconumpy" @@ -16,15 +18,20 @@ repository = "https://github.com/paugier/piconumpy" documentation = "https://github.com/paugier/piconumpy" [project.optional-dependencies] -dev = ['transonic', 'numpy', 'pytest', 'pythran'] -full = ['black'] +test = ["pytest", "numpy"] +# pythran 0.18.0 needed but not yet on PyPI +# (see https://github.com/serge-sans-paille/pythran/pull/2310#issuecomment-2871805768) +bench = ['transonic', 'numpy', 'pythran@git+https://github.com/serge-sans-paille/pythran.git@0.18.0'] +profile = ["gprof2dot"] +format = ['black'] +full = ["piconumpy[test,bench,profile,format]"] [build-system] requires = [ - "setuptools >= 35.0.2", + "setuptools>=35.0.2", "wheel", "cython", - "hpy >= 0.9.0" + "hpy>=0.9.0; implementation_name == 'cpython'" ] [tool.black]