Skip to content

Commit 80119b3

Browse files
committed
feat/perf: add performant async implementation
1 parent be0e854 commit 80119b3

File tree

4 files changed

+96
-66
lines changed

4 files changed

+96
-66
lines changed

py-gxhash/Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,9 @@ name = "gxhash"
1313
crate-type = ["cdylib"]
1414

1515
[dependencies]
16-
pyo3 = "0.22.0"
16+
pyo3 = "0.24.0"
1717
gxhash = { path = "..", features = ["hybrid"], default-features = false }
18+
pyo3-async-runtimes = { version = "0.24.0", features = ["tokio-runtime"] }
19+
tokio = "1.44.1"
20+
memmap2 = "0.9.5"
21+
libc = "0.2.171"

py-gxhash/README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# py-gxhash
22

33
```bash
4-
uv venv --seed
5-
uv run maturin develop
4+
uv sync --reinstall
65
```

py-gxhash/gxhash.pyi

Lines changed: 56 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
def gxhash32(input_bytes: bytes, seed: int) -> int:
1+
from typing import BinaryIO
2+
3+
def gxhash32(file: BinaryIO, seed: int) -> int:
24
"""
35
Summary
46
-------
@@ -7,28 +9,29 @@ def gxhash32(input_bytes: bytes, seed: int) -> int:
79
810
Parameters
911
----------
10-
input_bytes (bytes): input bytes to hash
12+
file (BinaryIO)
13+
file-like object
1114
12-
seed (int): seed for the hash function
15+
seed (int)
16+
seed for the hash function
1317
1418
1519
Returns
1620
-------
17-
hash (int): u32 hash of the input bytes
21+
hash (int)
22+
u32 hash of the input bytes
1823
1924
2025
Example
2126
-------
2227
```python
23-
import gxhash
24-
25-
input_bytes = bytes([42] * 1000)
28+
file = BytesIO(bytes([42] * 1000))
2629
seed = 1234
27-
print(f"Hash is {gxhash.gxhash32(input_bytes, seed)}!")
30+
print(f"Hash is {gxhash.gxhash32(file, seed)}!")
2831
```
2932
"""
3033

31-
def gxhash32_nogil(input_bytes: bytes, seed: int) -> int:
34+
async def gxhash32_async(file: BinaryIO, seed: int) -> int:
3235
"""
3336
Summary
3437
-------
@@ -37,28 +40,29 @@ def gxhash32_nogil(input_bytes: bytes, seed: int) -> int:
3740
3841
Parameters
3942
----------
40-
input_bytes (bytes): input bytes to hash
43+
file (BinaryIO)
44+
file-like object
4145
42-
seed (int): seed for the hash function
46+
seed (int)
47+
seed for the hash function
4348
4449
4550
Returns
4651
-------
47-
hash (int): u32 hash of the input bytes
52+
hash (Awaitable[int])
53+
u32 hash of the input bytes
4854
4955
5056
Example
5157
-------
5258
```python
53-
import gxhash
54-
55-
input_bytes = bytes([42] * 1000)
59+
file = BytesIO(bytes([42] * 1000))
5660
seed = 1234
57-
print(f"Hash is {gxhash.gxhash32_nogil(input_bytes, seed)}!")
61+
print(f"Hash is {gxhash.gxhash32_async(file, seed)}!")
5862
```
5963
"""
6064

61-
def gxhash64(input_bytes: bytes, seed: int) -> int:
65+
def gxhash64(file: BinaryIO, seed: int) -> int:
6266
"""
6367
Summary
6468
-------
@@ -67,28 +71,29 @@ def gxhash64(input_bytes: bytes, seed: int) -> int:
6771
6872
Parameters
6973
----------
70-
input_bytes (bytes): input bytes to hash
74+
file (BinaryIO)
75+
file-like object
7176
72-
seed (int): seed for the hash function
77+
seed (int)
78+
seed for the hash function
7379
7480
7581
Returns
7682
-------
77-
hash (int): u64 hash of the input bytes
83+
hash (int)
84+
u64 hash of the input bytes
7885
7986
8087
Example
8188
-------
8289
```python
83-
import gxhash
84-
85-
input_bytes = bytes([42] * 1000)
90+
file = BytesIO(bytes([42] * 1000))
8691
seed = 1234
87-
print(f"Hash is {gxhash.gxhash64(input_bytes, seed)}!")
92+
print(f"Hash is {gxhash.gxhash64(file, seed)}!")
8893
```
8994
"""
9095

91-
def gxhash64_nogil(input_bytes: bytes, seed: int) -> int:
96+
def gxhash64_async(file: BinaryIO, seed: int) -> int:
9297
"""
9398
Summary
9499
-------
@@ -97,28 +102,29 @@ def gxhash64_nogil(input_bytes: bytes, seed: int) -> int:
97102
98103
Parameters
99104
----------
100-
input_bytes (bytes): input bytes to hash
105+
file (BinaryIO)
106+
file-like object
101107
102-
seed (int): seed for the hash function
108+
seed (int)
109+
seed for the hash function
103110
104111
105112
Returns
106113
-------
107-
hash (int): u64 hash of the input bytes
114+
hash (Awaitable[int])
115+
u64 hash of the input bytes
108116
109117
110118
Example
111119
-------
112120
```python
113-
import gxhash
114-
115-
input_bytes = bytes([42] * 1000)
121+
file = BytesIO(bytes([42] * 1000))
116122
seed = 1234
117-
print(f"Hash is {gxhash.gxhash64_nogil(input_bytes, seed)}!")
123+
print(f"Hash is {gxhash.gxhash64_async(file, seed)}!")
118124
```
119125
"""
120126

121-
def gxhash128(input_bytes: bytes, seed: int) -> int:
127+
def gxhash128(file: BinaryIO, seed: int) -> int:
122128
"""
123129
Summary
124130
-------
@@ -127,28 +133,29 @@ def gxhash128(input_bytes: bytes, seed: int) -> int:
127133
128134
Parameters
129135
----------
130-
input_bytes (bytes): input bytes to hash
136+
file (BinaryIO)
137+
file-like object
131138
132-
seed (int): seed for the hash function
139+
seed (int)
140+
seed for the hash function
133141
134142
135143
Returns
136144
-------
137-
hash (int): u128 hash of the input bytes
145+
hash (int)
146+
u128 hash of the input bytes
138147
139148
140149
Example
141150
-------
142151
```python
143-
import gxhash
144-
145-
input_bytes = bytes([42] * 1000)
152+
file = BytesIO(bytes([42] * 1000))
146153
seed = 1234
147-
print(f"Hash is {gxhash.gxhash128(input_bytes, seed)}!")
154+
print(f"Hash is {gxhash.gxhash128(file, seed)}!")
148155
```
149156
"""
150157

151-
def gxhash128_nogil(input_bytes: bytes, seed: int) -> int:
158+
def gxhash128_async(file: BinaryIO, seed: int) -> int:
152159
"""
153160
Summary
154161
-------
@@ -157,23 +164,24 @@ def gxhash128_nogil(input_bytes: bytes, seed: int) -> int:
157164
158165
Parameters
159166
----------
160-
input_bytes (bytes): input bytes to hash
167+
file (BinaryIO)
168+
file-like object
161169
162-
seed (int): seed for the hash function
170+
seed (int)
171+
seed for the hash function
163172
164173
165174
Returns
166175
-------
167-
hash (int): u128 hash of the input bytes
176+
hash (Awaitable[int])
177+
u128 hash of the input bytes
168178
169179
170180
Example
171181
-------
172182
```python
173-
import gxhash
174-
175-
input_bytes = bytes([42] * 1000)
183+
file = BytesIO(bytes([42] * 1000))
176184
seed = 1234
177-
print(f"Hash is {gxhash.gxhash128_nogil(input_bytes, seed)}!")
185+
print(f"Hash is {gxhash.gxhash128_async(file, seed)}!")
178186
```
179187
"""

py-gxhash/src/lib.rs

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,61 @@
11
use pyo3::prelude::*;
2+
use pyo3_async_runtimes::tokio::future_into_py;
3+
use std::os::fd::FromRawFd;
4+
5+
fn get_file_descriptor(py: Python, file: PyObject) -> Result<i32, PyErr> {
6+
file.call_method0(py, pyo3::intern!(py, "foo"))?.extract(py)
7+
}
8+
9+
fn gxhash<T>(hasher: fn(&[u8], i64) -> T, file_descriptor: i32, seed: i64) -> PyResult<T> {
10+
let file = unsafe { std::fs::File::from_raw_fd(libc::dup(file_descriptor)) };
11+
let mmap = unsafe { memmap2::Mmap::map(&file).unwrap() };
12+
drop(file);
13+
Ok(hasher(&mmap, seed))
14+
}
215

316
#[pyfunction]
4-
fn gxhash32(input_bytes: &[u8], seed: i64) -> PyResult<u32> {
5-
Ok(gxhash::gxhash32(input_bytes, seed))
17+
fn gxhash32(py: Python, file: PyObject, seed: i64) -> PyResult<u32> {
18+
let file_descriptor = get_file_descriptor(py, file)?;
19+
gxhash(gxhash::gxhash32, file_descriptor, seed)
620
}
721

822
#[pyfunction]
9-
fn gxhash32_nogil(py: Python, input_bytes: &[u8], seed: i64) -> PyResult<u32> {
10-
py.allow_threads(|| Ok(gxhash::gxhash32(input_bytes, seed)))
23+
fn gxhash32_async(py: Python, file: PyObject, seed: i64) -> PyResult<Bound<PyAny>> {
24+
let file_descriptor = get_file_descriptor(py, file)?;
25+
future_into_py(py, async move { gxhash(gxhash::gxhash32, file_descriptor, seed) })
1126
}
1227

1328
#[pyfunction]
14-
fn gxhash64(input_bytes: &[u8], seed: i64) -> PyResult<u64> {
15-
Ok(gxhash::gxhash64(input_bytes, seed))
29+
fn gxhash64(py: Python, file: PyObject, seed: i64) -> PyResult<u64> {
30+
let file_descriptor = get_file_descriptor(py, file)?;
31+
gxhash(gxhash::gxhash64, file_descriptor, seed)
1632
}
1733

1834
#[pyfunction]
19-
fn gxhash64_nogil(py: Python, input_bytes: &[u8], seed: i64) -> PyResult<u64> {
20-
py.allow_threads(|| Ok(gxhash::gxhash64(input_bytes, seed)))
35+
fn gxhash64_async(py: Python, file: PyObject, seed: i64) -> PyResult<Bound<PyAny>> {
36+
let file_descriptor = get_file_descriptor(py, file)?;
37+
future_into_py(py, async move { gxhash(gxhash::gxhash64, file_descriptor, seed) })
2138
}
2239

2340
#[pyfunction]
24-
fn gxhash128(input_bytes: &[u8], seed: i64) -> PyResult<u128> {
25-
Ok(gxhash::gxhash128(input_bytes, seed))
41+
fn gxhash128(py: Python, file: PyObject, seed: i64) -> PyResult<u128> {
42+
let file_descriptor = get_file_descriptor(py, file)?;
43+
gxhash(gxhash::gxhash128, file_descriptor, seed)
2644
}
2745

2846
#[pyfunction]
29-
fn gxhash128_nogil(py: Python, input_bytes: &[u8], seed: i64) -> PyResult<u128> {
30-
py.allow_threads(|| Ok(gxhash::gxhash128(input_bytes, seed)))
47+
fn gxhash128_async(py: Python, file: PyObject, seed: i64) -> PyResult<Bound<PyAny>> {
48+
let file_descriptor = get_file_descriptor(py, file)?;
49+
future_into_py(py, async move { gxhash(gxhash::gxhash128, file_descriptor, seed) })
3150
}
3251

3352
#[pymodule(name = "gxhash")]
3453
fn pygxhash(m: &Bound<'_, PyModule>) -> PyResult<()> {
3554
m.add_function(wrap_pyfunction!(gxhash32, m)?)?;
36-
m.add_function(wrap_pyfunction!(gxhash32_nogil, m)?)?;
55+
m.add_function(wrap_pyfunction!(gxhash32_async, m)?)?;
3756
m.add_function(wrap_pyfunction!(gxhash64, m)?)?;
38-
m.add_function(wrap_pyfunction!(gxhash64_nogil, m)?)?;
57+
m.add_function(wrap_pyfunction!(gxhash64_async, m)?)?;
3958
m.add_function(wrap_pyfunction!(gxhash128, m)?)?;
40-
m.add_function(wrap_pyfunction!(gxhash128_nogil, m)?)?;
59+
m.add_function(wrap_pyfunction!(gxhash128_async, m)?)?;
4160
Ok(())
4261
}

0 commit comments

Comments
 (0)