diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 241dd84..e24b666 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -8,43 +8,43 @@ jobs:
strategy:
max-parallel: 5
matrix:
- python-version: ['3.8', '3.9', '3.10']
+ python-version: ['3.11', '3.12', 'pypy-3.11']
steps:
- name: Setup Julia
- uses: julia-actions/setup-julia@v1
+ uses: julia-actions/setup-julia@v2
- name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- - name: Install dependencies
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Build and install deps
run: |
- # install HPy from source if depending on a dev version
- # git clone -b master --single-branch https://github.com/hpyproject/hpy
- # git checkout 1234abcd
- # cd hpy
- # pip install .
- pip install numpy cython pytest transonic pythran 'setuptools>=60.2' 'hpy>=0.9.0rc1'
+ pip install -e .[full]
- - name: Checkout
- uses: actions/checkout@v3
- with:
- fetch-depth: 0
+ - if: startsWith(matrix.python-version, 'pypy') != true
+ name: Build universal extension (only needed for CPython)
+ run: |
+ pip install -e . --config-settings="--global-option=--hpy-abi=universal"
- - name: build
+ - name: Remove _piconumpy_hpy.py
run: |
- python setup.py develop
- python setup.py --hpy-abi=universal develop
+ rm -f piconumpy/_piconumpy_hpy.py
- name: Run tests
run: |
- pytest -s
+ pytest -v
- name: Run bench
run: |
cd bench
+ make tmp_result_julia.txt
+ make bench_hpy
+ make bench_full
+ # rerun bench_hpy to get these results also at the end
make bench_hpy
- make
diff --git a/.gitignore b/.gitignore
index 9a709bb..7a37679 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,5 +9,10 @@ build
**/tmp*.*
**/tmp*.*
+**/tmp/*
-*_cython.c
\ No newline at end of file
+*_cython.c
+
+piconumpy/_piconumpy_hpy.py
+
+.venv*
diff --git a/.mdformat.toml b/.mdformat.toml
new file mode 100644
index 0000000..972483a
--- /dev/null
+++ b/.mdformat.toml
@@ -0,0 +1,3 @@
+wrap = 89
+number = true
+end_of_line = "lf"
diff --git a/LICENSE b/LICENSE
index 44b8153..17e7869 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
BSD 3-Clause License
-Copyright (c) 2020, Pierre Augier
+Copyright (c) 2020-2025, Pierre Augier
Copyright (c) 2021, 2023, Oracle and/or it's affiliates
All rights reserved.
diff --git a/Makefile b/Makefile
index f7f7c54..92877da 100644
--- a/Makefile
+++ b/Makefile
@@ -1,38 +1,57 @@
ifeq ($(PYTHON),)
-PYTHON := python
+PYTHON := python3
endif
+IMPLEMENTATION := $(shell $(PYTHON) -c "import sys; print(sys.implementation.name)")
+
+
all:
- make develop_universal
-ifeq ($(PYTHON),python)
- make build_ext
+ make editable_universal
+ifeq ($(IMPLEMENTATION),cpython)
+ make editable
endif
+
+rm_hpy_py:
+ rm -f piconumpy/_piconumpy_hpy.py
+
+editable:
+ $(PYTHON) -m pip install -e .
+ make rm_hpy_py
+
+editable_universal:
+ $(PYTHON) -m pip install -e . --config-settings="--global-option=--hpy-abi=universal"
+ make rm_hpy_py
+
+editable_full:
+ $(PYTHON) -m pip install -e .[full]
+ make rm_hpy_py
+
+
+# deprecated but let's keep them
develop:
$(PYTHON) setup.py develop
+ make rm_hpy_py
develop_universal:
$(PYTHON) setup.py --hpy-abi=universal develop
- rm -f piconumpy/_piconumpy_hpy.py
-
-pip:
- $(PYTHON) -m pip install -e .[dev]
+ make rm_hpy_py
build_ext_universal:
$(PYTHON) setup.py --hpy-abi=universal build_ext -if
+ make rm_hpy_py
build_ext:
$(PYTHON) setup.py build_ext -if
+ make rm_hpy_py
-full:
- $(PYTHON) -m pip install -e .[full]
format:
black -l 82 setup.py piconumpy/*.py
clang-format-7 -i piconumpy/*cpython_capi.c
-tests:
+tests: rm_hpy_py
$(PYTHON) -m pytest piconumpy -s
clean:
@@ -40,4 +59,21 @@ clean:
rm -rf build dist piconumpy.egg-info
black:
- black -l 82 .
\ No newline at end of file
+ black -l 82 .
+
+
+install_pypy:
+ uv python install pypy
+
+install_graalpy:
+ uv python install graalpy
+
+create_venv_cpy:
+ $(PYTHON) -m venv .venv_cpy --upgrade-deps
+
+create_venv_pypy:
+ $(shell uv python find pypy) -m venv .venv_pypy --upgrade-deps
+
+create_venv_graalpy:
+ # cannot use --upgrade-deps because pip is patched for GraalPy
+ $(shell uv python find graalpy) -m venv .venv_graalpy
diff --git a/README.md b/README.md
index a0bad3d..f473ec5 100644
--- a/README.md
+++ b/README.md
@@ -5,81 +5,64 @@
**An experiment about Numpy and HPy**
The C API of CPython is one of the cause of the success of Python in scientific
-computing. In particular, Numpy (and all the Python scientific stack) is built
-on top of this API. However, some characteristics of this API start to be an
-issue for the future of scientific Python (see [1], [2], [HPy]).
+computing. In particular, Numpy (and all the Python scientific stack) is built on top of
+this API. However, some characteristics of this API start to be an issue for the future
+of scientific Python (see [1], [2], [HPy]).
-[1]: https://faster-cpython.readthedocs.io/
-[2]: https://morepypy.blogspot.com/2019/12/hpy-kick-off-sprint-report.html
-[HPy]: https://github.com/hpyproject/hpy
-
-[HPy] is a very ambitious and promissing project to design a new and better C
-API for interacting with Python interpreters. It should allow people to write
-Python extensions efficient on different interpreters (CPython, PyPy, Jython,
-IronPython, GraalPython, RustPython, etc.).
+[HPy] is a very ambitious and promising project to design a new and better C API for
+interacting with Python interpreters. It should allow people to write Python extensions
+efficient on different interpreters (CPython, PyPy, Jython, IronPython, GraalPython,
+RustPython, etc.).
-PyPy would be especially useful for some scientific applications. For example
-for Integration and ODEs
-([scipy.integrate](https://docs.scipy.org/doc/scipy/reference/integrate.html)),
-for which there are a lot of callbacks of very small functions. This repository
-contains [a tiny benchmark](bench/without_numpy) showing that as long as Numpy
-is not used, PyPy is very efficient for such task. Unfortunately, as soon as
-Numpy is used, PyPy becomes very slow!
+PyPy would be especially useful for some scientific applications. For example for
+Integration and ODEs
+([scipy.integrate](https://docs.scipy.org/doc/scipy/reference/integrate.html)), for which
+there are a lot of callbacks of very small functions. This repository contains
+[a tiny benchmark](bench/without_numpy) showing that as long as Numpy is not used, PyPy
+is very efficient for such task. Unfortunately, as soon as Numpy is used, PyPy becomes
+very slow!
-[bench/without_numpy]: https://github.com/paugier/piconumpy/blob/master/bench/without_numpy/
+With PicoNumpy, I'd like to study if [HPy] could help for codes using Numpy and callbacks
+of small Python functions.
-With PicoNumpy, I'd like to study if [HPy] could help for codes using Numpy and
-callbacks of small Python functions.
+We start by a [simple but realistic benchmark](bench/bench_array1d.py) (the slow loops
+only involve pure-Python and very simple Numpy). We then wrote a tiny ("pico")
+implementation of a Numpy like object (just sufficient to run the benchmark).
-We start by a [simple but realistic benchmark](bench/bench_array1d.py) (the
-slow loops only involve pure-Python and very simple Numpy). We then wrote a
-tiny ("pico") implementation of a Numpy like object (just sufficient to run the
-benchmark).
+The next task is to reimplement PicoNumpy using [HPy] and to check if PyPy could
+efficiently accelerate [our main benchmark](bench/bench_array1d.py).
-The next task is to reimplement PicoNumpy using [HPy] and to check if PyPy
-could efficiently accelerate [our main benchmark](bench/bench_array1d.py).
-
-PicoNumpy is really tiny. It just provides an `array` class (one-dimensional)
-supporting:
+PicoNumpy is really tiny. It just provides an `array` class (one-dimensional) supporting:
- Instantiation from a list of floats
-- Elementwise multiplication and division by a float
-- Elementwise addition (of 2 arrays)
+- Element-wise multiplication and division by a float
+- Element-wise addition (of 2 arrays)
- Indexing
- `len`
-A good acceleration by PyPy of our example would be a great proof that the
-scientific Python community has to invest time and energy on [HPy].
-
-In the script [bench_array1d.py](bench/bench_array1d.py), Transonic is used for
-the benchmark and comparison. With Transonic-Pythran, we typically get a 50
-speedup compared to CPython (and ~400 versus PyPy, which is still very slow for
-such codes using Numpy).
+A good acceleration by PyPy of our example would be a great proof that the scientific
+Python community has to invest time and energy on [HPy].
-[bench/bench_array1d.py]: https://github.com/paugier/piconumpy/blob/master/bench/bench_array1d.py
+In the script [bench_array1d.py](bench/bench_array1d.py), Transonic is used for the
+benchmark and comparison. With Transonic-Pythran, we typically get a 50 speed-up compared
+to CPython (and ~400 versus PyPy, which is still very slow for such codes using Numpy).
## Install and run the benchmarks
-**Warning:** PicoNumpy now depends on HPy, which still has to be installed from
-the [Git repository](https://github.com/hpyproject/hpy). For now, the
-installation is a bit more complex that what is described here (more about this
-[here](#more-precise-notes-on-how-to-install-and-run-the-benchmarks-with-PyPy)).
-
-`make` should install the package in editable mode. `cd bench; make` should run
-the benchmarks. For the benchmarks, Julia is used for a good comparison point
-so the command `julia` has to be available.
+`pip install -e .[full]` should build and install the package in editable mode and all
+dependencies necessary for testing, benchmarking and profiling.
-For PyPy, the Makefiles are sensible to the environment variable `PYTHON`, so
-you could do:
+For the benchmarks, Julia is used for a good comparison point so the command `julia` has
+to be available. Different benchmarks can be run with
-```bash
-export PYTHON=pypy3
-make
+```sh
cd bench
-make
+make clean
+make bench_hpy
+make bench_full
```
-The benchmark code can be profiled for the different implementations with the
+The benchmark code can be profiled for the different piconumpy implementations with the
commands (you need gprof2dot and graphviz):
```bash
@@ -90,97 +73,192 @@ make profile METHOD="purepy"
make profile METHOD="cython"
```
-### More precise notes on how to install and run the benchmarks with PyPy
+### Notes on PyPy
-Download and extract a nightly PyPy build
-. Add to the `PATH` environment variable
-the path of the directory containing the `pypy` executable (something like
-`~/opt/pypy-c-jit-101190-b661dc329618-linux64/bin`). Then, you should be able
-to run:
+PyPy can be downloaded with UV or manually (for example from
+ for a nightly build).
-```bash
-pypy -m ensurepip
-pypy -m pip install pip -U
-pypy -m pip install numpy cython pytest transonic pythran
+With UV, one can run
+
+```sh
+uv python install pypy
+```
+
+and then get the path towards `pypy` executable with:
+
+```sh
+uv python find pypy
```
-We need to install the correct version of HPy for the version of PyPy we are using:
+which can give something like
+`~/.local/share/uv/python/pypy-3.11.11-linux-x86_64-gnu/bin/pypy`.
+
+Then, you should be able to create a virtual environment, activate it and build-install
+PicoNumpy with
```bash
-pypy -c "import hpy.universal as u; print(u.get_version())"
+cd ~/dev/piconumpy
+$(uv python find pypy) -m venv .venv_pypy --upgrade-deps
+. .venv_pypy/bin/activate
+pip install -e .[full]
```
-gives `('0.0.2rc2.dev12+gc9660c2', 'c9660c2')`.
+and run the benchmarks with:
```bash
-cd ~/Dev/hpy
-# update to the correct commit
-pypy setup.py develop
+cd bench
+make clean
+make bench_hpy
+make bench_full
```
-Now we can build-install PicoNumpy:
+Note that one can check which HPy version is vendored with PyPy:
```bash
-cd ~/Dev/piconumpy
-pypy setup.py --hpy-abi=universal develop
+python -c "import hpy.universal as u; print(u.get_version())"
```
-And run the benchmarks with:
+### Notes on GraalPy
+
+GraalPy can be downloaded with UV with
+
+```sh
+uv python install graalpy
+```
+
+Then, one can run
+
+```sh
+cd ~/dev/piconumpy
+# cannot use --upgrade-deps because pip is patched for GraalPy
+$(uv python find graalpy) -m venv .venv_graalpy
+. .venv_graalpy/bin/activate
+# we don't try to run the full benchmarks using Pythran on GraalPy
+pip install -e .[test,profile]
+```
+
+and run the benchmarks with:
```bash
-export PYTHON="pypy"
+cd bench
make clean
make bench_hpy
-make
```
## Few results
-As of today (6 July 2021), HPy is not yet ready for high performance, but at
-least (with HPy 0.0.2) it runs !
-
-### At home (Intel(R) Core(TM) i5-8400 CPU @ 2.80GHz)
+### Full benchmarks
- With CPython
```
-Julia : 1 * norm = 0.00196 s
-PicoNumpy (CPython C-API) : 9.42 * norm
-PicoNumpy (HPy CPy ABI) : 9.95 * norm
-PicoNumpy (HPy Universal) : 10.4 * norm
-Transonic-Pythran : 0.497 * norm
-Numpy : 27.5 * norm
-PicoNumpy (purepy) : 37.3 * norm
-PicoNumpy (purepy_array) : 37.7 * norm
-PicoNumpy (Cython) : 28.9 * norm
+{'cache_tag': 'cpython-311',
+ 'version': sys.version_info(major=3, minor=11, micro=2, releaselevel='final', serial=0)}
+hostname: meige7ltpa212
+Julia : 1 * norm = 0.0129 s
+PicoNumpy (CPython C-API) : 6.55 * norm
+PicoNumpy (HPy CPy ABI) : 7.46 * norm
+PicoNumpy (HPy Universal) : 7.92 * norm
+Transonic-Pythran : 0.581 * norm
+Numpy : 27.1 * norm
+PicoNumpy (purepy) : 18.8 * norm
+PicoNumpy (purepy_array) : 31.7 * norm
+PicoNumpy (Cython) : 23.3 * norm
```
- With PyPy3
```
-Julia : 1 * norm = 0.00196 s
-PicoNumpy (CPython C-API) : 34.1 * norm
-PicoNumpy (HPy Universal) : 12.8 * norm
-Transonic-Pythran : 0.539 * norm
-Numpy : 232 * norm
-PicoNumpy (purepy) : 4.39 * norm
-PicoNumpy (purepy_array) : 6.33 * norm
-PicoNumpy (Cython) : 274 * norm
+{'cache_tag': 'pypy311',
+ 'version': sys.pypy_version_info(major=7, minor=3, micro=19, releaselevel='final', serial=0)}
+hostname: meige7ltpa212
+Julia : 1 * norm = 0.0129 s
+PicoNumpy (CPython C-API) : 35.5 * norm
+PicoNumpy (HPy Universal) : 44.7 * norm
+Transonic-Pythran : 0.609 * norm
+Numpy : 168 * norm
+PicoNumpy (purepy) : 2.98 * norm
+PicoNumpy (purepy_array) : 8.7 * norm
+PicoNumpy (Cython) : 288 * norm
```
-#### Simpler benchmarks (bench/bench_cpy_vs_hpy.py)
+Discussion: PyPy with HPy universal is really too slow (44.7x slower than Julia, 6x slower than
+CPython with its C-API and even a bit slower that PyPy with cpyext!). This is a big issue
+for HPy!
+
+A reasonable target would be as fast as CPython with its C-API...
+
+Profiling shows that the issue is related to slow element-wise operations as in the micro-benchmark
+
+```sh
+cd microbench_low_level
+make bench_element_wise
+```
- With CPython
+```sh
+bench element_wise
+hostname: meige7ltpa212
+{'cache_tag': 'cpython-311',
+ 'version': sys.version_info(major=3, minor=11, micro=2, releaselevel='final', serial=0)}
+piconumpy.purepy : 7.88e-06 s ( 21.9 * Julia)
+numpy : 7.88e-06 s ( 21.9 * Julia)
+piconumpy.hpy (universal) : 1.34e-06 s ( 3.7 * Julia)
+piconumpy.cpython_capi : 6.12e-07 s ( 1.7 * Julia)
```
-CPython C-API: 1.92 seconds
-HPy [Universal]: 2.08 seconds
-HPy [CPy ABI]: 2.02 seconds
+
+- With PyPy3
+
+```sh
+bench element_wise
+hostname: meige7ltpa212
+{'cache_tag': 'pypy311',
+ 'version': sys.pypy_version_info(major=7, minor=3, micro=19, releaselevel='final', serial=0)}
+piconumpy.purepy : 1.46e-06 s ( 4.1 * Julia)
+numpy : 4.39e-05 s (121.9 * Julia)
+piconumpy.hpy (universal) : 4.27e-06 s ( 11.9 * Julia)
+piconumpy.cpython_capi : 1.84e-06 s ( 5.1 * Julia)
+```
+
+### Simpler benchmarks (bench/bench_cpy_vs_hpy.py)
+
+- With CPython
+
+```
+{'cache_tag': 'cpython-311',
+ 'version': sys.version_info(major=3, minor=11, micro=2, releaselevel='final', serial=0)}
+hostname: meige7ltpa212
+Julia: 0.013 seconds
+CPython C-API: 0.084 seconds ( 6.5 * Julia)
+HPy [Universal]: 0.102 seconds ( 7.9 * Julia)
+HPy [CPy ABI]: 0.096 seconds ( 7.4 * Julia)
```
- With PyPy3
```
-CPython C-API: 5.75 seconds
-HPy [Universal]: 2.11 seconds
+{'cache_tag': 'pypy311',
+ 'version': sys.pypy_version_info(major=7, minor=3, micro=19, releaselevel='final', serial=0)}
+hostname: meige7ltpa212
+Julia: 0.013 seconds
+CPython C-API: 0.382 seconds (29.6 * Julia)
+HPy [Universal]: 0.487 seconds (37.6 * Julia)
+Python list: 0.037 seconds ( 2.9 * Julia)
```
+
+- GraalPy
+
+```
+{'cache_tag': 'graalpy242-311',
+ 'version': sys.version_info(major=3, minor=11, micro=7, releaselevel='final', serial=0)}
+hostname: meige7ltpa212
+Julia: 0.013 seconds
+CPython C-API: 2.123 seconds (164.2 * Julia)
+HPy [Universal]: 1.541 seconds (119.2 * Julia)
+Python list: 0.542 seconds (41.9 * Julia)
+```
+
+[1]: https://faster-cpython.readthedocs.io/
+[2]: https://morepypy.blogspot.com/2019/12/hpy-kick-off-sprint-report.html
+[hpy]: https://github.com/hpyproject/hpy
diff --git a/bench/Makefile b/bench/Makefile
index 7da6e64..eb4c4d4 100644
--- a/bench/Makefile
+++ b/bench/Makefile
@@ -7,7 +7,7 @@ ifeq ($(METHOD),)
METHOD := cpython-c-api
endif
-all: tmp.py tmp_result_julia.txt
+bench_full: rm_hpy_py tmp.py tmp_result_julia.txt
$(PYTHON) tmp.py
tmp.py: bench_array1d.py make_bench_piconumpy.py
@@ -20,11 +20,14 @@ clean:
tmp_result_julia.txt:
julia bench.jl > tmp_result_julia.txt
-profile: tmp.py
+profile: rm_hpy_py tmp.py
$(PYTHON) profile_piconumpy.py $(METHOD)
# with gprof2dot and graphviz (command dot)
gprof2dot -f pstats tmp.pstats | dot -Tpng -o tmp_$(METHOD).png
eog tmp_$(METHOD).png
-bench_hpy:
+bench_hpy: rm_hpy_py
$(PYTHON) bench_cpy_vs_hpy.py
+
+rm_hpy_py:
+ rm -f ../piconumpy/_piconumpy_hpy.py
diff --git a/bench/bench.jl b/bench/bench.jl
index 00cedff..bd98571 100644
--- a/bench/bench.jl
+++ b/bench/bench.jl
@@ -65,7 +65,7 @@ function bench(n_sleds, n_time)
end
-n_sleds = 10
+n_sleds = 100
n_time = 200
nb_runs = 200
diff --git a/bench/bench_array1d.py b/bench/bench_array1d.py
index a73a635..ba4426f 100644
--- a/bench/bench_array1d.py
+++ b/bench/bench_array1d.py
@@ -1,9 +1,14 @@
+import sys
+
import numpy as np
from numpy import array
from math import pi, cos, sin
-from transonic import jit
+from transonic import jit, wait_for_all_extensions
+
+IS_CPY = sys.implementation.name == "cpython"
+IS_PYPY = sys.implementation.name == "pypy"
# begin code functions (don't remove this line)
@@ -75,15 +80,15 @@ def bench(n_sleds, n_time):
# end code functions (don't remove this line)
+if IS_CPY or IS_PYPY:
-bench_pythran = jit(bench)
-# Numba does not support this code...
-# bench_numba = jit(backend="numba")(bench)
-from transonic import wait_for_all_extensions
+ bench_pythran = jit(bench)
+ # Numba does not support this code...
+ # bench_numba = jit(backend="numba")(bench)
-# warmup (compilation of the Pythran extension)
-bench_pythran(1, 1)
-wait_for_all_extensions()
+ # warmup (compilation of the Pythran extension)
+ bench_pythran(1, 1)
+ wait_for_all_extensions()
if __name__ == "__main__":
diff --git a/bench/bench_cpy_vs_hpy.py b/bench/bench_cpy_vs_hpy.py
index 1b36278..1bb35dd 100644
--- a/bench/bench_cpy_vs_hpy.py
+++ b/bench/bench_cpy_vs_hpy.py
@@ -1,8 +1,11 @@
-import sys
-import time
import random
+import socket
+import sys
+
from math import pi, cos, sin
from pathlib import Path
+from pprint import pprint
+from time import perf_counter
here = Path(__file__).absolute().parent
@@ -14,7 +17,7 @@ def my_randn(mod, n):
return result
-IS_PYPY = hasattr(sys, "pypy_version_info")
+IS_CPY = sys.implementation.name == "cpython"
def runge_kutta_step(mod, f, x0, dt, t=None):
@@ -75,14 +78,18 @@ def bench(mod, n_sleds, n_time):
u_init = mod.zeros(n_sleds)
for i in range(n_sleds):
u_init[i] += 3.5
- start = time.time()
- solver(mod, board, x_init, y_init, u_init, v_init, 0.01, n_time)
- end = time.time()
- return end - start
+ times = []
+ for _ in range(20):
+ start = perf_counter()
+ solver(mod, board, x_init, y_init, u_init, v_init, 0.01, n_time)
+ times.append(perf_counter() - start)
+
+ times.sort()
+ return times[len(times) // 2]
N_SLEDS = 100
-N_TIME = 2000
+N_TIME = 200
def import_piconumpy_hpy_universal():
@@ -101,18 +108,48 @@ def main():
import piconumpy._piconumpy_cpython_capi as pnp_capi
- t = bench(pnp_capi, N_SLEDS, N_TIME)
- print(f"CPython C-API: {t:.2f} seconds")
+ pprint({key: sys.implementation.__dict__[key] for key in ("cache_tag", "version")})
+ print(f"hostname: {socket.gethostname()}")
+
+ tmp_result_julia = Path("tmp_result_julia.txt")
+ if tmp_result_julia.exists():
+ with open("tmp_result_julia.txt") as file:
+ norm = float(file.read())
+ end = ""
+ print(f"Julia: {norm:.3f} seconds")
+ else:
+ norm = False
+ end = "\n"
+
+ t_capi = bench(pnp_capi, N_SLEDS, N_TIME)
+ print(f"CPython C-API: {t_capi:.3f} seconds", end=end)
+ if norm:
+ print(f" ({t_capi/norm:4.1f} * Julia)")
pnp_hpy_universal = import_piconumpy_hpy_universal()
- t = bench(pnp_hpy_universal, N_SLEDS, N_TIME)
- print(f"HPy [Universal]: {t:.2f} seconds")
+ t_hpy_univ = bench(pnp_hpy_universal, N_SLEDS, N_TIME)
+ print(f"HPy [Universal]: {t_hpy_univ:.3f} seconds", end=end)
- if not IS_PYPY:
+ if norm:
+ print(f" ({t_hpy_univ/norm:4.1f} * Julia)")
+
+ if IS_CPY:
import piconumpy._piconumpy_hpy as pnp_hpy
- t = bench(pnp_hpy, N_SLEDS, N_TIME)
- print(f"HPy [CPy ABI]: {t:.2f} seconds")
+ t_hpy_cpy_abi = bench(pnp_hpy, N_SLEDS, N_TIME)
+ print(f"HPy [CPy ABI]: {t_hpy_cpy_abi:.3f} seconds", end=end)
+
+ if norm:
+ print(f" ({t_hpy_cpy_abi/norm:4.1f} * Julia)")
+
+ if not IS_CPY:
+ import piconumpy.purepy as pnp_with_list
+
+ t_with_list = bench(pnp_with_list, N_SLEDS, N_TIME)
+ print(f"Python list: {t_with_list:.3f} seconds", end=end)
+
+ if norm:
+ print(f" ({t_with_list/norm:4.1f} * Julia)")
if __name__ == "__main__":
diff --git a/bench/make_bench_piconumpy.py b/bench/make_bench_piconumpy.py
index 4fbf5c0..4f92bcc 100644
--- a/bench/make_bench_piconumpy.py
+++ b/bench/make_bench_piconumpy.py
@@ -9,8 +9,9 @@ def create_tmp_file(name_module):
if name_module == "_piconumpy_hpy_universal":
code_import = """
-from piconumpy import _piconumpy_hpy
-array = _piconumpy_hpy.array
+from piconumpy.util_hpy import import_ext
+ext = import_ext()
+array = ext.array
"""
else:
code_import = f"from piconumpy.{name_module} import array"
@@ -42,12 +43,19 @@ def create_tmp_file(name_module):
code = (
"""
+import socket
import sys
+
+from math import pi, cos, sin
+from pathlib import Path
+from pprint import pprint
+
import numpy as np
+
from piconumpy import array
-from math import pi, cos, sin
-IS_PYPY = hasattr(sys, 'pypy_version_info')
+IS_CPY = sys.implementation.name == "cpython"
+
"""
+ code_functions
+ """
@@ -61,12 +69,16 @@ def create_tmp_file(name_module):
from tmp_purepy_array import bench as bench_piconumpy_purepy_array
from tmp_cython import bench as bench_cython
-if not IS_PYPY:
+if IS_CPY:
from tmp_hpy import bench as bench_hpy
+pprint({key: sys.implementation.__dict__[key] for key in ("cache_tag", "version")})
+print(f"hostname: {socket.gethostname()}")
# get norm from Julia benchmark
-with open("tmp_result_julia.txt") as file:
- norm = float(file.read())
+
+path_julia_result = Path("tmp_result_julia.txt")
+assert path_julia_result.exists()
+norm = float(path_julia_result.read_text())
max_length_name = len("piconumpy (CPython C-API)") + 2
@@ -74,12 +86,12 @@ def create_tmp_file(name_module):
name = fmt_name.format("Julia")
print(f"{name}: 1 * norm = {norm:4.3g} s")
-n_sleds = 10
+n_sleds = 100
n_time = 200
g = locals()
-def timeit(name_func, name):
+def timeit(name_func, name, total_duration=2):
return timeit_verbose(
name_func + "(n_sleds, n_time)",
globals=g,
@@ -87,21 +99,28 @@ def timeit(name_func, name):
print_time=False,
norm=norm,
max_length_name=max_length_name,
+ total_duration=total_duration,
)
timeit("bench", name="PicoNumpy (CPython C-API)")
-if not IS_PYPY:
+if IS_CPY:
timeit("bench_hpy", name="PicoNumpy (HPy CPy ABI)")
timeit("bench_hpy_universal", name="PicoNumpy (HPy Universal)")
timeit("bench_pythran", name="Transonic-Pythran")
-timeit("bench_numpy", name="Numpy")
+try:
+ timeit("bench_numpy", name="Numpy", total_duration=8)
+except RuntimeError:
+ print("Skip bench_numpy because it's too slow")
timeit(
"bench_piconumpy_purepy", name="PicoNumpy (purepy)",
)
timeit(
"bench_piconumpy_purepy_array", name="PicoNumpy (purepy_array)",
)
-timeit("bench_cython", name="PicoNumpy (Cython)")
+try:
+ timeit("bench_cython", name="PicoNumpy (Cython)", total_duration=8)
+except RuntimeError:
+ print("Skip bench_cython because it's too slow")
"""
)
diff --git a/bench/microbench_low_level/Makefile b/bench/microbench_low_level/Makefile
new file mode 100644
index 0000000..5f874dd
--- /dev/null
+++ b/bench/microbench_low_level/Makefile
@@ -0,0 +1,48 @@
+
+IMPLEMENTATION=$(shell python -c 'import sys; print(sys.implementation.cache_tag)')
+
+.PHONY : clean bench_sum_loop bench_sum_loop_index bench_cort bench_init_zeros bench_instantiate
+
+bench_sum_loop: NAME_BENCH=sum_loop
+bench_sum_loop: tmp/sum_loop_julia.txt _bench
+
+bench_sum_loop_index: NAME_BENCH=sum_loop_index
+bench_sum_loop_index: tmp/sum_loop_index_julia.txt _bench
+
+bench_cort: NAME_BENCH=cort
+bench_cort: tmp/cort_julia.txt _bench
+
+bench_init_zeros: NAME_BENCH=init_zeros
+bench_init_zeros: tmp/init_zeros_julia.txt _bench
+
+bench_board: NAME_BENCH=board
+bench_board: tmp/board_julia.txt _bench
+
+bench_instantiate: NAME_BENCH=instantiate
+bench_instantiate: tmp/instantiate_julia.txt _bench
+
+bench_element_wise: NAME_BENCH=element_wise
+bench_element_wise: tmp/element_wise_julia.txt _bench
+
+_bench:
+ @echo bench $(NAME_BENCH)
+ @python -c "from socket import gethostname as f; print('hostname:', f())"
+ @python -c "import sys; from pprint import pprint as p; p({key: sys.implementation.__dict__[key] for key in ('cache_tag', 'version')})"
+ @python bench.py list $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_list.txt
+ @python bench.py purepy $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_purepy.txt
+ @python bench.py numpy $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_numpy.txt
+ @python bench.py _piconumpy_hpy $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_hpy.txt
+ @python bench.py _piconumpy_cpython_capi $(NAME_BENCH) | tee tmp/$(NAME_BENCH)_$(IMPLEMENTATION)_cpy_api.txt
+
+tmp/%_julia.txt: julia/bench_%.jl
+ @mkdir -p tmp
+ @julia julia/bench_$*.jl > $@
+
+clean:
+ rm -rf tmp
+
+produce_traces: tmp/sum_loop_julia.txt
+ @mkdir -p tmp
+ PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp/pypylog_list.txt pypy bench.py list
+ PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp/pypylog_piconumpy_list.txt pypy bench.py purepy
+ PYPYLOG=jit-log-opt,jit-summary,jit-backend-counts:tmp/pypylog_piconumpy_hpy.txt pypy bench.py _piconumpy_hpy
diff --git a/bench/microbench_low_level/README.md b/bench/microbench_low_level/README.md
new file mode 100644
index 0000000..7965c76
--- /dev/null
+++ b/bench/microbench_low_level/README.md
@@ -0,0 +1,25 @@
+# Microbenchmarks low level Python code
+
+We measure the performance for functions containing low level Python code.
+
+- `sum_loop` (command `make bench`): `for value in arr` and summation
+
+- `sum_loop_index` (command `make bench_sum_loop_index`):
+ `for index in range(5000)` and summation
+
+- `init_zeros` (command `make bench_init_zeros`): set values to zeros
+
+- `cort` (command `make bench_cort`): normalized cosine similarity measure
+ between derivatives
+
+- `board` (command `make bench_board`): few indexing, simple float computations
+ with sin/cos and instantiation of a small array.
+
+- `instantiate` (command `make bench_instantiate`): dominated by the
+ instantiation/deletion of small arrays of 4 floats.
+
+- `element_wise` (command `make bench_element_wise`): dominated by the
+ instantiation/deletion of small arrays of 4 floats and calling element-wise
+ operations.
+
+The files result_*.txt contain few results.
diff --git a/bench/microbench_low_level/bench.py b/bench/microbench_low_level/bench.py
new file mode 100644
index 0000000..50a7d48
--- /dev/null
+++ b/bench/microbench_low_level/bench.py
@@ -0,0 +1,186 @@
+import sys
+from time import perf_counter
+from pathlib import Path
+from random import random
+from math import sqrt, pi, sin, cos
+
+try:
+ method = sys.argv[1]
+except IndexError:
+ method = "purepy"
+
+try:
+ name_bench = sys.argv[2]
+except IndexError:
+ name_bench = "sum_loop"
+
+try:
+ size = sys.argv[3]
+except IndexError:
+ size = None
+
+if method == "_piconumpy_hpy":
+ from piconumpy.util_hpy import import_ext
+
+ ext = import_ext()
+ array = ext.array
+elif method == "list":
+ array = list
+ if name_bench == "element_wise":
+ sys.exit(0)
+
+elif method == "numpy":
+
+ try:
+ import numpy as np
+ except ImportError:
+ print(f"{method:30s}: ImportError numpy")
+ sys.exit(0)
+
+ array = np.array
+else:
+ d = {}
+ exec(f"from piconumpy.{method} import array", d)
+ array = d["array"]
+ if "piconumpy" not in method:
+ method = f"piconumpy.{method}"
+
+if "_piconumpy_" in method:
+ method = method.replace("_piconumpy_", "piconumpy.")
+
+if method.endswith("hpy"):
+ method += " (universal)"
+
+tmp_result_julia = Path(f"tmp/{name_bench}_julia.txt")
+if tmp_result_julia.exists():
+ with open(tmp_result_julia) as file:
+ norm = float(file.read())
+else:
+ raise RuntimeError(
+ f"{tmp_result_julia} does not exist. First execute with `make`"
+ )
+
+
+def sum_loop(arr):
+ result = 0.0
+ for value in arr:
+ result += value
+ return result
+
+
+def sum_loop_index(arr):
+ result = 0.0
+ for index in range(5000):
+ result += arr[index]
+ return result
+
+
+def init_zeros(arr):
+ for index in range(len(arr)):
+ arr[index] = 0.0
+
+
+def _cort(s1, s2):
+ num = 0.0
+ sum_square_x = 0.0
+ sum_square_y = 0.0
+ for t in range(len(s1) - 1):
+ slope_1 = s1[t + 1] - s1[t]
+ slope_2 = s2[t + 1] - s2[t]
+ num += slope_1 * slope_2
+ sum_square_x += slope_1 * slope_1
+ sum_square_y += slope_2 * slope_2
+ return num / (sqrt(sum_square_x * sum_square_y))
+
+
+def cort(arr):
+ return _cort(arr, arr)
+
+
+def board(X_0):
+ x0 = X_0[0]
+ y0 = X_0[1]
+ u0 = X_0[2]
+ v0 = X_0[3]
+
+ g = 9.81
+ b = 0.5
+ a = 0.25
+ c = 0.5
+ p = (2 * pi) / 10.0
+ q = (2 * pi) / 4.0
+
+ H_x = -a + b * p * sin(p * x0) * cos(q * y0)
+ H_xx = b * p ** 2 * cos(p * x0) * cos(q * y0)
+ H_y = b * q * cos(p * x0) * sin(q * y0)
+ H_yy = b * q ** 2 * cos(p * x0) * cos(q * y0)
+ H_xy = -b * q * p * sin(p * x0) * sin(q * y0)
+
+ F = (g + H_xx * u0 ** 2 + 2 * H_xy * u0 * v0 + H_yy * v0 ** 2) / (
+ 1 + H_x ** 2 + H_y ** 2
+ )
+
+ dU = -F * H_x - c * u0
+ dV = -F * H_y - c * v0
+
+ return array([u0, v0, dU, dV])
+
+
+def instantiate(arr):
+ x = arr[0]
+ result = array([x, 3 * x, 6 * x, 9 * x])
+ result[0] = 2 * result[1]
+ return result
+
+
+def element_wise(arr):
+
+ dt = 0.1
+ x0 = arr
+
+ k1 = x0 * dt
+ k2 = (x0 + k1 / 2) * dt
+ k3 = (x0 + k2 / 2) * dt
+ k4 = (x0 + k3) * dt
+ # workaround for a pypy bug
+ # see https://foss.heptapod.net/pypy/pypy/-/issues/3509
+ # x_new = x0 + (k1 + 2 * k2 + 2 * k3 + k4) / 6
+ x_new = x0 + (k1 + k2 * 2 + k3 * 2 + k4) / 6
+ return x_new
+
+
+compute_from_arr = locals()[name_bench]
+
+if size is None:
+ if name_bench.startswith("sum_loop") or name_bench == "cort":
+ size = 10000
+ else:
+ size = 4
+
+print(f"{method:30s}:", end="", flush=True)
+
+# warming during ~ 1s
+data_as_list = [random() for _ in range(size)]
+arr = array(data_as_list)
+t_start = perf_counter()
+while perf_counter() - t_start < 1.0:
+ compute_from_arr(arr)
+
+
+def median(sequence):
+ tmp = sorted(sequence)
+ return tmp[len(tmp) // 2]
+
+
+# measure during ~ 4s
+t0 = perf_counter()
+times = []
+while perf_counter() - t0 < 4.0:
+ data_as_list = [random() for _ in range(size)]
+ arr = array(data_as_list)
+ t_start = perf_counter()
+ compute_from_arr(arr)
+ times.append(perf_counter() - t_start)
+
+time = median(times)
+print(f" {time:.2e} s ({time / norm:5.1f} * Julia)")
diff --git a/bench/microbench_low_level/julia/bench_board.jl b/bench/microbench_low_level/julia/bench_board.jl
new file mode 100644
index 0000000..69d8b64
--- /dev/null
+++ b/bench/microbench_low_level/julia/bench_board.jl
@@ -0,0 +1,44 @@
+using Statistics
+
+function board(X_0::Array)
+
+ x0 = copy(X_0[1])
+ y0 = copy(X_0[2])
+ u0 = copy(X_0[3])
+ v0 = copy(X_0[4])
+
+ g = 9.81
+ a = 0.25
+ b = 0.5
+ c = 0.5
+ p = (2*π)/10.0
+ q = (2*π)/4.0
+
+ H_x = -a + b*p*sin(p*x0)*cos(q*y0)
+ H_xx = b*p^2 * cos(p*x0)*cos(q*y0)
+ H_y = b*q*cos(p*x0)*sin(q*y0)
+ H_yy = b*q^2 * cos(p*x0)*cos(q*y0)
+ H_xy = -b*q*p*sin(p*x0)*sin(q*y0)
+
+ F = (g + H_xx*u0^2 + 2*H_xy*u0*v0 + H_yy*v0^2)/(1 + H_x^2 + H_y^2)
+
+ dU = -F*H_x - c*u0
+ dV = -F*H_y - c*v0
+
+ return [u0, v0, dU, dV]
+
+end
+
+compute_from_arr = board
+
+size = 4
+nb_runs = 200
+
+times = zeros(nb_runs)
+
+for irun in 1:nb_runs
+ arr = rand(size)
+ times[irun] = @elapsed compute_from_arr(arr)
+end
+
+println(median(times))
diff --git a/bench/microbench_low_level/julia/bench_cort.jl b/bench/microbench_low_level/julia/bench_cort.jl
new file mode 100644
index 0000000..a816541
--- /dev/null
+++ b/bench/microbench_low_level/julia/bench_cort.jl
@@ -0,0 +1,35 @@
+using Statistics
+
+
+function cort(s1, s2)
+ num = 0.0
+ sum_square_x = 0.0
+ sum_square_y = 0.0
+ for t in 1:length(s1)-1
+ slope_1 = s1[t + 1] - s1[t]
+ slope_2 = s2[t + 1] - s2[t]
+ num += slope_1 * slope_2
+ sum_square_x += slope_1 * slope_1
+ sum_square_y += slope_2 * slope_2
+ end
+ return num / (sqrt(sum_square_x * sum_square_y))
+end
+
+function use_cort(arr)
+ return cort(arr, arr)
+end
+
+
+compute_from_arr = use_cort
+
+size = 10000
+nb_runs = 200
+
+times = zeros(nb_runs)
+
+for irun in 1:nb_runs
+ arr = rand(size)
+ times[irun] = @elapsed compute_from_arr(arr)
+end
+
+println(median(times))
diff --git a/bench/microbench_low_level/julia/bench_element_wise.jl b/bench/microbench_low_level/julia/bench_element_wise.jl
new file mode 100644
index 0000000..c91a16f
--- /dev/null
+++ b/bench/microbench_low_level/julia/bench_element_wise.jl
@@ -0,0 +1,30 @@
+using Statistics
+
+function element_wise(arr::Array)
+
+ dt = 0.1
+ x0 = arr
+
+ k1 = x0 * dt
+ k2 = (x0 + k1 / 2) * dt
+ k3 = (x0 + k2 / 2) * dt
+ k4 = (x0 + k3) * dt
+ x_new = x0 + (k1 + 2 * k2 + 2 * k3 + k4) / 6
+
+ return x_new
+
+end
+
+compute_from_arr = element_wise
+
+size = 4
+nb_runs = 2000
+
+times = zeros(nb_runs)
+
+for irun in 1:nb_runs
+ arr = rand(size)
+ times[irun] = @elapsed compute_from_arr(arr)
+end
+
+println(median(times))
diff --git a/bench/microbench_low_level/julia/bench_init_zeros.jl b/bench/microbench_low_level/julia/bench_init_zeros.jl
new file mode 100644
index 0000000..4ac2656
--- /dev/null
+++ b/bench/microbench_low_level/julia/bench_init_zeros.jl
@@ -0,0 +1,21 @@
+using Statistics
+
+function init_zeros(arr)
+ for i in eachindex(arr)
+ arr[i] = 0.0
+ end
+end
+
+compute_from_arr = init_zeros
+
+size = 4
+nb_runs = 200
+
+times = zeros(nb_runs)
+
+for irun in 1:nb_runs
+ arr = rand(size)
+ times[irun] = @elapsed compute_from_arr(arr)
+end
+
+println(median(times))
diff --git a/bench/microbench_low_level/julia/bench_instantiate.jl b/bench/microbench_low_level/julia/bench_instantiate.jl
new file mode 100644
index 0000000..5116e07
--- /dev/null
+++ b/bench/microbench_low_level/julia/bench_instantiate.jl
@@ -0,0 +1,22 @@
+using Statistics
+
+function instantiate(arr::Array)
+ x = arr[1]
+ result = [x, 3*x, 6*x, 9*x]
+ result[1] = 2 * result[2]
+ return result
+end
+
+compute_from_arr = instantiate
+
+size = 4
+nb_runs = 200
+
+times = zeros(nb_runs)
+
+for irun in 1:nb_runs
+ arr = rand(size)
+ times[irun] = @elapsed compute_from_arr(arr)
+end
+
+println(median(times))
diff --git a/bench/microbench_low_level/julia/bench_sum_loop.jl b/bench/microbench_low_level/julia/bench_sum_loop.jl
new file mode 100644
index 0000000..5c38b52
--- /dev/null
+++ b/bench/microbench_low_level/julia/bench_sum_loop.jl
@@ -0,0 +1,23 @@
+using Statistics
+
+function sum_loop(arr)
+ result = 0.
+ for i in eachindex(arr)
+ result += arr[i]
+ end
+ return result
+end
+
+compute_from_arr = sum_loop
+
+size = 10000
+nb_runs = 200
+
+times = zeros(nb_runs)
+
+for irun in 1:nb_runs
+ arr = rand(size)
+ times[irun] = @elapsed compute_from_arr(arr)
+end
+
+println(median(times))
diff --git a/bench/microbench_low_level/julia/bench_sum_loop_index.jl b/bench/microbench_low_level/julia/bench_sum_loop_index.jl
new file mode 100644
index 0000000..b4c682c
--- /dev/null
+++ b/bench/microbench_low_level/julia/bench_sum_loop_index.jl
@@ -0,0 +1,23 @@
+using Statistics
+
+function sum_loop_index(arr)
+ result = 0.
+ for i = 1:5000
+ result += arr[i]
+ end
+ return result
+end
+
+compute_from_arr = sum_loop_index
+
+size = 10000
+nb_runs = 200
+
+times = zeros(nb_runs)
+
+for irun in 1:nb_runs
+ arr = rand(size)
+ times[irun] = @elapsed compute_from_arr(arr)
+end
+
+println(median(times))
diff --git a/bench/microbench_low_level/result_board.md b/bench/microbench_low_level/result_board.md
new file mode 100644
index 0000000..30b407b
--- /dev/null
+++ b/bench/microbench_low_level/result_board.md
@@ -0,0 +1,77 @@
+# Microbenchmark board
+
+We measure the performance for this function:
+
+```python
+def board(X_0):
+ x0 = X_0[0]
+ y0 = X_0[1]
+ u0 = X_0[2]
+ v0 = X_0[3]
+
+ g = 9.81
+ b = 0.5
+ a = 0.25
+ c = 0.5
+ p = (2 * pi) / 10.0
+ q = (2 * pi) / 4.0
+
+ H_x = -a + b * p * sin(p * x0) * cos(q * y0)
+ H_xx = b * p ** 2 * cos(p * x0) * cos(q * y0)
+ H_y = b * q * cos(p * x0) * sin(q * y0)
+ H_yy = b * q ** 2 * cos(p * x0) * cos(q * y0)
+ H_xy = -b * q * p * sin(p * x0) * sin(q * y0)
+
+ F = (g + H_xx * u0 ** 2 + 2 * H_xy * u0 * v0 + H_yy * v0 ** 2) / (
+ 1 + H_x ** 2 + H_y ** 2
+ )
+
+ dU = -F * H_x - c * u0
+ dV = -F * H_y - c * v0
+
+ return array([u0, v0, dU, dV])
+```
+
+One can run the benchmarks with `make bench_board`.
+
+With PyPy3.7, I get:
+
+```
+bench board
+hostname: voyage
+{'cache_tag': 'pypy37',
+ 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)}
+list : 3.21e-07 s ( 0.9 * Julia)
+piconumpy.purepy : 1.37e-05 s ( 36.9 * Julia)
+numpy : 1.18e-04 s (316.6 * Julia)
+piconumpy.hpy : 1.26e-05 s ( 33.8 * Julia)
+piconumpy.cpython_capi : 5.52e-05 s (148.6 * Julia)
+```
+
+With CPython:
+
+```
+bench board
+hostname: voyage
+{'cache_tag': 'cpython-39',
+ 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)}
+list : 5.16e-06 s ( 13.9 * Julia)
+piconumpy.purepy : 8.04e-06 s ( 21.6 * Julia)
+numpy : 1.01e-05 s ( 27.1 * Julia)
+piconumpy.hpy : 5.90e-06 s ( 15.9 * Julia)
+piconumpy.cpython_capi : 5.56e-06 s ( 15.0 * Julia)
+```
+
+With Python 3.8.5 (GraalVM CE Native 21.3.0)
+
+```
+bench board
+hostname: voyage
+{'cache_tag': 'graalpython-38',
+ 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)}
+list : 1.15e-05 s ( 30.9 * Julia)
+piconumpy.purepy : 1.74e-05 s ( 46.8 * Julia)
+numpy : ImportError numpy
+piconumpy.hpy : 4.91e-05 s (132.2 * Julia)
+piconumpy.cpython_capi : 6.19e-05 s (166.7 * Julia)
+```
diff --git a/bench/microbench_low_level/result_cort.md b/bench/microbench_low_level/result_cort.md
new file mode 100644
index 0000000..b5578bf
--- /dev/null
+++ b/bench/microbench_low_level/result_cort.md
@@ -0,0 +1,64 @@
+# Microbenchmark cort
+
+We measure the performance for this function:
+
+```python
+def cort(arr):
+ return _cort(arr, arr)
+
+def _cort(s1, s2):
+ num = 0.0
+ sum_square_x = 0.0
+ sum_square_y = 0.0
+ for t in range(len(s1) - 1):
+ slope_1 = s1[t + 1] - s1[t]
+ slope_2 = s2[t + 1] - s2[t]
+ num += slope_1 * slope_2
+ sum_square_x += slope_1 * slope_1
+ sum_square_y += slope_2 * slope_2
+ return num / (sqrt(sum_square_x * sum_square_y))
+```
+
+One can run the benchmarks with `make bench_cort`.
+
+With PyPy3.7, I get:
+
+```
+bench cort
+hostname: voyage
+{'cache_tag': 'pypy37',
+ 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)}
+list : 4.29e-05 s ( 1.8 * Julia)
+piconumpy.purepy : 4.12e-05 s ( 1.7 * Julia)
+numpy : 4.77e-02 s (1975.5 * Julia)
+piconumpy.hpy : 1.46e-03 s ( 60.5 * Julia)
+piconumpy.cpython_capi : 6.96e-03 s (288.5 * Julia)
+```
+
+With CPython:
+
+```
+bench cort
+hostname: voyage
+{'cache_tag': 'cpython-39',
+ 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)}
+list : 4.42e-03 s (183.4 * Julia)
+piconumpy.purepy : 1.04e-02 s (430.0 * Julia)
+numpy : 9.76e-03 s (404.4 * Julia)
+piconumpy.hpy : 5.66e-03 s (234.7 * Julia)
+piconumpy.cpython_capi : 4.77e-03 s (197.7 * Julia)
+```
+
+With Python 3.8.5 (GraalVM CE Native 21.3.0)
+
+```
+bench cort
+hostname: voyage
+{'cache_tag': 'graalpython-38',
+ 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)}
+list : 2.44e-05 s ( 1.0 * Julia)
+piconumpy.purepy : 3.13e-05 s ( 1.3 * Julia)
+numpy : ImportError numpy
+piconumpy.hpy : 1.69e-04 s ( 7.0 * Julia)
+piconumpy.cpython_capi : 3.55e-04 s ( 14.7 * Julia)
+```
diff --git a/bench/microbench_low_level/result_init_zeros.md b/bench/microbench_low_level/result_init_zeros.md
new file mode 100644
index 0000000..b88e4bd
--- /dev/null
+++ b/bench/microbench_low_level/result_init_zeros.md
@@ -0,0 +1,53 @@
+# Microbenchmark sum_init_zeros
+
+We measure the performance for this function:
+
+```python
+def init_zeros(arr):
+ for index in range(len(arr)):
+ arr[index] = 0.0
+```
+
+One can run the benchmarks with `make bench_init_zeros`.
+
+With PyPy3.7, I get:
+
+```
+bench init_zeros
+hostname: voyage
+{'cache_tag': 'pypy37',
+ 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)}
+list : 2.63e-05 s ( 5.4 * Julia)
+piconumpy.purepy : 2.99e-05 s ( 6.1 * Julia)
+numpy : 1.17e-02 s (2403.5 * Julia)
+piconumpy.hpy : 4.58e-04 s ( 94.1 * Julia)
+piconumpy.cpython_capi : 8.46e-04 s (173.6 * Julia)
+```
+
+With CPython:
+
+```
+bench init_zeros
+hostname: voyage
+{'cache_tag': 'cpython-39',
+ 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)}
+list : 5.34e-04 s (109.6 * Julia)
+piconumpy.purepy : 2.03e-03 s (417.4 * Julia)
+numpy : 1.17e-03 s (239.3 * Julia)
+piconumpy.hpy : 7.51e-04 s (154.1 * Julia)
+piconumpy.cpython_capi : 5.44e-04 s (111.5 * Julia)
+```
+
+With Python 3.8.5 (GraalVM CE Native 21.3.0)
+
+```
+bench init_zeros
+hostname: voyage
+{'cache_tag': 'graalpython-38',
+ 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)}
+list : 1.37e-05 s ( 2.8 * Julia)
+piconumpy.purepy : 1.93e-05 s ( 4.0 * Julia)
+numpy : ImportError numpy
+piconumpy.hpy : 4.68e-05 s ( 9.6 * Julia)
+piconumpy.cpython_capi : 1.74e-04 s ( 35.8 * Julia)
+```
diff --git a/bench/microbench_low_level/result_instantiate.md b/bench/microbench_low_level/result_instantiate.md
new file mode 100644
index 0000000..883cea1
--- /dev/null
+++ b/bench/microbench_low_level/result_instantiate.md
@@ -0,0 +1,55 @@
+# Microbenchmark instantiate
+
+We measure the performance for this function:
+
+```python
+def instantiate(arr):
+ x = arr[0]
+ result = array([x, 3 * x, 6 * x, 9 * x])
+ result[0] = 2 * result[1]
+ return result
+```
+
+One can run the benchmarks with `make bench_instantiate`.
+
+With PyPy3.7, I get:
+
+```
+bench instantiate
+hostname: meige8pcpa79
+{'cache_tag': 'pypy37',
+ 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)}
+list : 1.13e-07 s ( 0.9 * Julia)
+piconumpy.purepy : 8.50e-08 s ( 0.7 * Julia)
+numpy : ImportError numpy
+piconumpy.hpy : 1.69e-06 s ( 13.1 * Julia)
+piconumpy.cpython_capi : 1.53e-05 s (118.3 * Julia)
+```
+
+With CPython:
+
+```
+bench instantiate
+hostname: meige8pcpa79
+{'cache_tag': 'cpython-39',
+ 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)}
+list : 1.19e-06 s ( 9.2 * Julia)
+piconumpy.purepy : 2.59e-06 s ( 20.0 * Julia)
+numpy : 3.63e-06 s ( 28.1 * Julia)
+piconumpy.hpy : 1.84e-06 s ( 14.3 * Julia)
+piconumpy.cpython_capi : 1.35e-06 s ( 10.5 * Julia)
+```
+
+With Python 3.8.5 (GraalVM CE Native 21.3.0)
+
+```
+bench instantiate
+hostname: meige8pcpa79
+{'cache_tag': 'graalpython-38',
+ 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)}
+list : 4.16e-06 s ( 32.3 * Julia)
+piconumpy.purepy : 4.15e-06 s ( 32.2 * Julia)
+numpy : ImportError numpy
+piconumpy.hpy : 7.32e-06 s ( 56.8 * Julia)
+piconumpy.cpython_capi : 9.68e-06 s ( 75.0 * Julia)
+```
diff --git a/bench/microbench_low_level/result_sum_loop.md b/bench/microbench_low_level/result_sum_loop.md
new file mode 100644
index 0000000..062840b
--- /dev/null
+++ b/bench/microbench_low_level/result_sum_loop.md
@@ -0,0 +1,201 @@
+# Microbenchmark sum_loop
+
+We measure the performance for this function:
+
+```python
+def sum_loop(arr):
+ result = 0.0
+ for value in arr:
+ result += value
+ return result
+```
+
+One can run the benchmarks with `make bench_sum_loop`.
+
+With PyPy3.7, I get:
+
+```
+bench sum_loop
+hostname: voyage
+{'cache_tag': 'pypy37',
+ 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)}
+list : 2.35e-05 s ( 1.8 * Julia)
+piconumpy.purepy : 2.60e-05 s ( 2.0 * Julia)
+numpy : 8.97e-03 s (677.0 * Julia)
+piconumpy.hpy : 3.73e-04 s ( 28.2 * Julia)
+piconumpy.cpython_capi : 1.75e-03 s (132.4 * Julia)
+```
+
+With CPython:
+
+```
+bench sum_loop
+hostname: voyage
+{'cache_tag': 'cpython-39',
+ 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)}
+list : 3.65e-04 s ( 27.5 * Julia)
+piconumpy.purepy : 2.17e-03 s (164.1 * Julia)
+numpy : 1.09e-03 s ( 82.2 * Julia)
+piconumpy.hpy : 7.39e-04 s ( 55.8 * Julia)
+piconumpy.cpython_capi : 5.07e-04 s ( 38.3 * Julia)
+```
+
+With Python 3.8.5 (GraalVM CE Native 21.3.0)
+
+```
+bench sum_loop
+hostname: voyage
+{'cache_tag': 'graalpython-38',
+ 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)}
+list : 1.92e-05 s ( 1.4 * Julia)
+piconumpy.purepy : 3.61e-05 s ( 2.7 * Julia)
+numpy : ImportError numpy
+piconumpy.hpy : 5.03e-04 s ( 38.0 * Julia)
+piconumpy.cpython_capi : 2.90e-03 s (219.1 * Julia)
+```
+
+## Summary
+
+- PyPy is fast with list (1.3 * Julia, same order of magnitude than with Julia)
+and as fast for a piconumpy array based on a list ("piconumpy.purepy", zero
+cost abstraction!)
+
+- Numpy and _piconumpy_cpython_capi are both much slower with PyPy than with
+Cpython. We can guess that the Numpy port to HPy would fix that.
+
+- piconumpy_hpy is a bit faster with PyPy (19 * Julia) than with CPython (40 *
+Julia), however, we see that PyPy does not strongly accelerate piconumpy_hpy
+(19 * Julia, 14 * piconumpy_list).
+
+## Traces PyPy `sum_loop`
+
+### List
+
+```
++557: label(p0, p1, p6, p9, f35, f30, p15, p22, p26, i32, i27, p29, descr=TargetToken(140447503809120))
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER')
++606: i44 = uint_ge(i32, i27)
+guard_false(i44, descr=) [p0, p6, p9, p15, p1, i32, i27, i44, p26, f30, f35]
++615: f45 = getarrayitem_gc_f(p29, i32, descr=)
++622: i47 = int_add(i32, 1)
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST')
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST')
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST')
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD')
++626: f48 = float_add(f35, f45)
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST')
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE')
++630: setfield_gc(p15, i47, descr=)
++634: guard_not_invalidated(descr=) [p0, p6, p9, p15, p1, f45, f48, None, None]
++634: i51 = getfield_raw_i(140447672379264, descr=)
++647: i53 = int_sub(i51, 1)
++651: setfield_raw(140447672379264, i53, descr=)
++654: i56 = int_lt(i53, 0)
++658: guard_false(i56, descr=) [p0, p6, p9, p15, p1, i53, f45, f48, None, None]
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER')
++664: i57 = arraylen_gc(p29, descr=)
++664: jump(p0, p1, p6, p9, f48, f45, p15, p22, p26, i47, i27, p29, descr=TargetToken(140447503809120))
+```
+
+### piconumpy purepy (based on list)
+
+```
++705: label(p0, p1, p6, p9, f53, f46, p15, p22, i49, p29, p38, p42, i43, p45, descr=TargetToken(139748702723776))
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER')
++760: guard_not_invalidated(descr=) [p0, p6, p9, p15, p1, p22, i49, f46, f53]
++760: p62 = force_token()
++760: enter_portal_frame(21, 28364)
+debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#0 LOAD_FAST')
+debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#2 LOAD_ATTR')
+debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#4 LOAD_FAST')
+debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#6 BINARY_SUBSCR')
++760: i65 = uint_ge(i49, i43)
++763: guard_false(i65, descr=) [p0, p6, p9, p15, p1, p22, i49, f46, f53]
++769: f66 = getarrayitem_gc_f(p45, i49, descr=)
+debug_merge_point(1, 1, '__getitem__;/home/pierre/Dev/piconumpy/piconumpy/purepy.py:27-28~#8 RETURN_VALUE')
++776: leave_portal_frame(21)
++776: i69 = int_add(i49, 1)
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST')
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST')
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST')
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD')
++780: f70 = float_add(f53, f66)
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST')
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE')
++784: i72 = getfield_raw_i(139748871243648, descr=)
++797: i74 = int_sub(i72, 3)
++801: setfield_raw(139748871243648, i74, descr=)
++804: setfield_gc(p15, i69, descr=)
++808: i77 = int_lt(i74, 0)
++812: guard_false(i77, descr=) [p0, p6, p9, p15, p1, i74, f66, f70, None, None, None]
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER')
++818: i78 = arraylen_gc(p45, descr=)
++818: jump(p0, p1, p6, p9, f70, f66, p15, p22, i69, p29, p38, p42, i43, p45, descr=TargetToken(139748702723776))
+```
+
+### piconumpy hpy
+
+```
++1339: label(p0, p1, p6, p9, f73, p63, p15, i68, p62, descr=TargetToken(139865876151520))
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER')
++1352: p82 = getfield_gc_r(p15, descr=)
++1356: guard_nonnull_class(p82, 139866025815200, descr=) [p0, p6, p9, p63, p15, p1, p82, f73]
++1376: p84 = getfield_gc_r(p82, descr=)
++1387: guard_value(p84, ConstPtr(ptr85), descr=) [p0, p6, p9, p63, p15, p1, p82, f73]
++1396: guard_not_invalidated(descr=) [p0, p6, p9, p63, p15, p1, p82, f73]
++1403: p87 = getfield_gc_r(ConstPtr(ptr86), descr=)
++1414: guard_value(p87, ConstPtr(ptr88), descr=) [p0, p6, p9, p63, p15, p1, p82, f73]
++1423: i90 = getfield_gc_i(ConstPtr(ptr89), descr=)
++1427: i92 = int_lt(i68, 0)
++1431: guard_false(i92, descr=) [p0, p6, p9, p63, p15, p1, p82, f73]
++1444: i94 = getfield_gc_i(ConstPtr(ptr93), descr=)
++1448: i95 = int_is_zero(i94)
++1451: guard_false(i95, descr=) [p0, p6, p9, p63, p15, p1, p82, f73]
++1457: i97 = int_sub(i94, 1)
++1461: p99 = getfield_gc_r(ConstPtr(ptr98), descr=)
++1465: i100 = getarrayitem_gc_i(p99, i97, descr=)
++1470: i101 = arraylen_gc(p99, descr=)
++1474: i103 = int_rshift(i101, 1)
++1477: i105 = int_sub(i103, 5)
++1481: i106 = int_lt(i97, i105)
++1484: cond_call(i106, ConstClass(_ll_list_resize_hint_really_look_inside_iff__listPtr_Signed_Bool), ConstPtr(ptr108), i97, 0, descr=)
++1490: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i68, i90, i100, p82, i97, f73]
++1490: setfield_gc(ConstPtr(ptr110), i97, descr=)
++1494: i112 = int_lt(i100, 0)
++1498: guard_false(i112, descr=) [p0, p6, p9, p63, p15, p1, i68, i90, i100, p82, f73]
++1522: setarrayitem_gc(p62, i100, p82, descr=)
++1527: p113 = force_token()
++1548: setfield_gc(p0, p113, descr=)
++1552: i115 = call_may_force_i(i90, 139866044538144, i100, i68, descr=)
++1663: guard_not_forced(descr=) [p0, p6, p9, p63, p15, p1, i100, i115, i68, p82, f73]
++1674: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i100, i115, i68, p82, f73]
++1688: call_n(ConstClass(close), i100, descr=)
++1754: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, i115, i68, p82, f73]
++1768: i117 = int_is_true(i115)
++1771: guard_true(i117, descr=) [p0, p6, p9, p63, p15, p1, i115, i68, p82, f73]
++1784: p119 = getfield_gc_r(ConstPtr(ptr118), descr=)
++1788: p120 = getarrayitem_gc_r(p119, i115, descr=)
++1793: call_n(ConstClass(close), i115, descr=)
++1866: guard_no_exception(descr=) [p0, p6, p9, p63, p15, p1, p120, i68, p82, f73]
++1880: guard_nonnull_class(p120, ConstClass(W_FloatObject), descr=) [p0, p6, p9, p63, p15, p1, p120, i68, p82, f73]
++1907: i123 = getfield_gc_i(p15, descr=)
++1918: i125 = int_add(i123, 1)
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#12 STORE_FAST')
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#14 LOAD_FAST')
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#16 LOAD_FAST')
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#18 INPLACE_ADD')
++1923: setfield_gc(p15, i125, descr=)
++1927: f126 = getfield_gc_f(p120, descr=)
++1933: f127 = float_add(f73, f126)
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#20 STORE_FAST')
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-26~#22 JUMP_ABSOLUTE')
++1947: guard_not_invalidated(descr=) [p0, p6, p9, p120, p15, p1, f127, None, None, None]
++1947: i129 = getfield_raw_i(139866044675968, descr=)
++1960: i131 = int_sub(i129, 3)
++1964: setfield_raw(139866044675968, i131, descr=)
++1967: i134 = int_lt(i131, 0)
++1971: guard_false(i134, descr=) [p0, p6, p9, p120, p15, p1, i131, f127, None, None, None]
+debug_merge_point(0, 0, 'sum_loop;bench.py:23-25~#10 FOR_ITER')
++1977: i135 = arraylen_gc(p119, descr=)
++1977: jump(p0, p1, p6, p9, f127, p120, p15, i125, p119, descr=TargetToken(139865876151520))
+```
diff --git a/bench/microbench_low_level/result_sum_loop_index.md b/bench/microbench_low_level/result_sum_loop_index.md
new file mode 100644
index 0000000..fd63301
--- /dev/null
+++ b/bench/microbench_low_level/result_sum_loop_index.md
@@ -0,0 +1,55 @@
+# Microbenchmark sum_loop_index
+
+We measure the performance for this function:
+
+```python
+def sum_loop_index(arr):
+ result = 0.0
+ for index in range(5000):
+ result += arr[index]
+ return result
+```
+
+One can run the benchmarks with `make bench_sum_loop_index`.
+
+With PyPy3.7, I get:
+
+```
+bench sum_loop_index
+hostname: voyage
+{'cache_tag': 'pypy37',
+ 'version': sys.pypy_version_info(major=7, minor=3, micro=7, releaselevel='final', serial=0)}
+list : 1.19e-05 s ( 2.0 * Julia)
+piconumpy.purepy : 1.64e-05 s ( 2.8 * Julia)
+numpy : 4.18e-03 s (711.4 * Julia)
+piconumpy.hpy : 1.73e-04 s ( 29.4 * Julia)
+piconumpy.cpython_capi : 8.44e-04 s (143.8 * Julia)
+```
+
+With CPython:
+
+```
+bench sum_loop_index
+hostname: voyage
+{'cache_tag': 'cpython-39',
+ 'version': sys.version_info(major=3, minor=9, micro=7, releaselevel='final', serial=0)}
+list : 3.91e-04 s ( 66.5 * Julia)
+piconumpy.purepy : 1.11e-03 s (188.3 * Julia)
+numpy : 8.93e-04 s (152.1 * Julia)
+piconumpy.hpy : 5.42e-04 s ( 92.3 * Julia)
+piconumpy.cpython_capi : 4.17e-04 s ( 71.0 * Julia)
+```
+
+With Python 3.8.5 (GraalVM CE Native 21.3.0)
+
+```
+bench sum_loop_index
+hostname: voyage
+{'cache_tag': 'graalpython-38',
+ 'version': sys.version_info(major=3, minor=8, micro=5, releaselevel='alpha', serial=0)}
+list : 1.36e-05 s ( 2.3 * Julia)
+piconumpy.purepy : 1.81e-05 s ( 3.1 * Julia)
+numpy : ImportError numpy
+piconumpy.hpy : 3.68e-05 s ( 6.3 * Julia)
+piconumpy.cpython_capi : 1.08e-04 s ( 18.5 * Julia)
+```
diff --git a/bench/profile_piconumpy.py b/bench/profile_piconumpy.py
index b7de388..3bde5ae 100644
--- a/bench/profile_piconumpy.py
+++ b/bench/profile_piconumpy.py
@@ -7,12 +7,14 @@
import tmp_purepy
import tmp_purepy_array
import tmp_cython
+import tmp_hpy_universal
methods = {
"cpython-c-api": bench_array1d,
"purepy": tmp_purepy,
"purepy_array": tmp_purepy_array,
"cython": tmp_cython,
+ "universal": tmp_hpy_universal,
}
module = methods.get(sys.argv[-1], bench_array1d)
diff --git a/piconumpy/bench.py b/piconumpy/bench.py
index a704e5f..f5d4d8d 100644
--- a/piconumpy/bench.py
+++ b/piconumpy/bench.py
@@ -11,6 +11,13 @@ def timeit_verbose(
print_time=False,
max_length_name=33,
):
+ if name is None:
+ name = stmt.split("(")[0]
+
+ fmt_name = f"{{:{max_length_name}s}}"
+ name = fmt_name.format(name)
+ print(f"{name}:", end="", flush=True)
+
result = timeit(
stmt, setup=setup, total_duration=total_duration, globals=globals
)
@@ -20,18 +27,12 @@ def timeit_verbose(
else:
norm_given = True
- if name is None:
- name = stmt.split("(")[0]
-
- fmt_name = f"{{:{max_length_name}s}}"
- name = fmt_name.format(name)
-
if print_time:
raw_time = f" = {result:7.3g} s"
else:
raw_time = ""
- print(f"{name}: {result/norm:5.3g} * norm{raw_time}")
+ print(f" {result/norm:5.3g} * norm{raw_time}")
if not norm_given and not print_time:
print(f"norm = {norm:5.3g} s")
diff --git a/piconumpy/purepy.py b/piconumpy/purepy.py
index a84ad31..bfa4b03 100644
--- a/piconumpy/purepy.py
+++ b/piconumpy/purepy.py
@@ -2,7 +2,7 @@ class array:
__slots__ = ["data", "size"]
def __init__(self, data):
- self.data = list(float(number) for number in data)
+ self.data = list(data)
self.size = len(self.data)
def __add__(self, other):
@@ -30,9 +30,10 @@ def __getitem__(self, index):
def __setitem__(self, index, value):
self.data[index] = value
+
def empty(size):
- return array([0]*size)
+ return array([0] * size)
-def zeros(size):
- return array([0]*size)
+def zeros(size):
+ return array([0] * size)
diff --git a/piconumpy/purepy_array.py b/piconumpy/purepy_array.py
index ba801a2..7306cff 100644
--- a/piconumpy/purepy_array.py
+++ b/piconumpy/purepy_array.py
@@ -23,8 +23,10 @@ def __mul__(self, other):
def __truediv__(self, other):
return self.__class__(number / other for number in self)
+
def empty(size):
- return array([0]*size)
+ return array([0] * size)
+
def zeros(size):
- return array([0]*size)
+ return array([0] * size)
diff --git a/piconumpy/test_cpython_capi.py b/piconumpy/test_cpython_capi.py
index a1638dc..cedbed5 100644
--- a/piconumpy/test_cpython_capi.py
+++ b/piconumpy/test_cpython_capi.py
@@ -6,6 +6,7 @@
class Tests:
piconumpy = _piconumpy_cpython_capi
+
def _array(self, *args):
return self.piconumpy.array(*args)
diff --git a/piconumpy/test_cython.py b/piconumpy/test_cython.py
index 44cf1c5..438adc7 100644
--- a/piconumpy/test_cython.py
+++ b/piconumpy/test_cython.py
@@ -1,4 +1,5 @@
from .test_cpython_capi import Tests as _Tests
+
class Tests(_Tests):
from . import _piconumpy_cython as piconumpy
diff --git a/piconumpy/test_hpy_universal.py b/piconumpy/test_hpy_universal.py
index 358f037..2a470ca 100644
--- a/piconumpy/test_hpy_universal.py
+++ b/piconumpy/test_hpy_universal.py
@@ -1,16 +1,31 @@
+import sys
+
import pytest
+from .util_hpy import import_ext
from .test_cpython_capi import Tests as _Tests
try:
- from . import _piconumpy_hpy
+ piconumpy_universal = import_ext()
except ImportError:
- _piconumpy_hpy = False
+ piconumpy_universal = False
@pytest.mark.skipif(
- not _piconumpy_hpy, reason="ImportError piconumpy HPy Universal"
+ not piconumpy_universal, reason="ImportError piconumpy HPy Universal"
)
class TestsCPyABI(_Tests):
- piconumpy = _piconumpy_hpy
+ piconumpy = piconumpy_universal
+
+ def test_multiply(self):
+ if sys.implementation.name == "pypy":
+ pytest.xfail("Expected failure with PyPy (but should work)")
+
+ super().test_multiply()
+
+ def test_add(self):
+ if sys.implementation.name == "pypy":
+ pytest.xfail("Expected failure with PyPy (but should work)")
+
+ super().test_add()
diff --git a/piconumpy/test_purepy.py b/piconumpy/test_purepy.py
index 0793611..e7320e0 100644
--- a/piconumpy/test_purepy.py
+++ b/piconumpy/test_purepy.py
@@ -1,4 +1,5 @@
from .test_cpython_capi import Tests as _Tests
+
class Tests(_Tests):
from . import purepy as piconumpy
diff --git a/piconumpy/test_purepy_array.py b/piconumpy/test_purepy_array.py
index b41a8b7..4c3da8c 100644
--- a/piconumpy/test_purepy_array.py
+++ b/piconumpy/test_purepy_array.py
@@ -1,4 +1,5 @@
from .test_cpython_capi import Tests as _Tests
+
class Tests(_Tests):
from . import purepy_array as piconumpy
diff --git a/piconumpy/util_hpy.py b/piconumpy/util_hpy.py
new file mode 100644
index 0000000..1fbc47c
--- /dev/null
+++ b/piconumpy/util_hpy.py
@@ -0,0 +1,21 @@
+from importlib.util import spec_from_file_location
+from pathlib import Path
+
+from hpy.universal import load
+
+
+def import_from_path(path):
+ name_ext = "_piconumpy_hpy"
+ ext_filepath = str(path)
+ spec = spec_from_file_location(name_ext, ext_filepath)
+ m = load(name_ext, ext_filepath, spec)
+ m.__file__ = ext_filepath
+ m.__loader__ = __loader__
+ m.__name__ = __name__
+ m.__package__ = __package__
+ return m
+
+
+def import_ext():
+ path = Path(__file__).parent / "_piconumpy_hpy.hpy0.so"
+ return import_from_path(path)
diff --git a/pyproject.toml b/pyproject.toml
index 3234fad..ff6b793 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,10 +5,12 @@ description = "An experiment about Numpy and pyhandle/hpy."
authors = [
{name = "Pierre Augier", email = "pierre.augier@univ-grenoble-alpes.fr"},
]
-license = {text = "BSD 3-Clause"}
+license = "BSD-3-Clause"
+license-files = ["LICENSE"]
readme = "README.md"
keywords = ["numpy", "hpy", "PyPy"]
requires-python = ">=3.8"
+dependencies = ["hpy>=0.9.0; implementation_name == 'cpython'"]
[project.urls]
homepage = "https://github.com/paugier/piconumpy"
@@ -16,15 +18,20 @@ repository = "https://github.com/paugier/piconumpy"
documentation = "https://github.com/paugier/piconumpy"
[project.optional-dependencies]
-dev = ['transonic', 'numpy', 'pytest', 'pythran']
-full = ['black']
+test = ["pytest", "numpy"]
+# pythran 0.18.0 needed but not yet on PyPI
+# (see https://github.com/serge-sans-paille/pythran/pull/2310#issuecomment-2871805768)
+bench = ['transonic', 'numpy', 'pythran@git+https://github.com/serge-sans-paille/pythran.git@0.18.0']
+profile = ["gprof2dot"]
+format = ['black']
+full = ["piconumpy[test,bench,profile,format]"]
[build-system]
requires = [
- "setuptools >= 35.0.2",
+ "setuptools>=35.0.2",
"wheel",
"cython",
- "hpy >= 0.9.0"
+ "hpy>=0.9.0; implementation_name == 'cpython'"
]
[tool.black]