Skip to content

Commit c9d7469

Browse files
committed
feat/perf: add performant async implementation
1 parent be0e854 commit c9d7469

File tree

4 files changed

+111
-66
lines changed

4 files changed

+111
-66
lines changed

py-gxhash/Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,9 @@ name = "gxhash"
1313
crate-type = ["cdylib"]
1414

1515
[dependencies]
16-
pyo3 = "0.22.0"
16+
pyo3 = "0.24.0"
1717
gxhash = { path = "..", features = ["hybrid"], default-features = false }
18+
pyo3-async-runtimes = { version = "0.24.0", features = ["tokio-runtime"] }
19+
tokio = "1.44.1"
20+
memmap2 = "0.9.5"
21+
libc = "0.2.171"

py-gxhash/README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# py-gxhash
22

33
```bash
4-
uv venv --seed
5-
uv run maturin develop
4+
uv sync --reinstall
65
```

py-gxhash/gxhash.pyi

Lines changed: 71 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
def gxhash32(input_bytes: bytes, seed: int) -> int:
1+
from typing import Protocol
2+
3+
class File(Protocol):
4+
def fileno(self) -> int: ...
5+
6+
def gxhash32(file: File, seed: int) -> int:
27
"""
38
Summary
49
-------
@@ -7,28 +12,31 @@ def gxhash32(input_bytes: bytes, seed: int) -> int:
712
813
Parameters
914
----------
10-
input_bytes (bytes): input bytes to hash
15+
file (File)
16+
file-like object
1117
12-
seed (int): seed for the hash function
18+
seed (int)
19+
seed for the hash function
1320
1421
1522
Returns
1623
-------
17-
hash (int): u32 hash of the input bytes
24+
hash (int)
25+
u32 hash of the input bytes
1826
1927
2028
Example
2129
-------
2230
```python
23-
import gxhash
24-
25-
input_bytes = bytes([42] * 1000)
31+
file = TemporaryFile()
32+
file.write(bytes([42] * 1000))
33+
file.seek(0)
2634
seed = 1234
27-
print(f"Hash is {gxhash.gxhash32(input_bytes, seed)}!")
35+
print(f"Hash is {gxhash.gxhash32(file, seed)}!")
2836
```
2937
"""
3038

31-
def gxhash32_nogil(input_bytes: bytes, seed: int) -> int:
39+
async def gxhash32_async(file: File, seed: int) -> int:
3240
"""
3341
Summary
3442
-------
@@ -37,28 +45,31 @@ def gxhash32_nogil(input_bytes: bytes, seed: int) -> int:
3745
3846
Parameters
3947
----------
40-
input_bytes (bytes): input bytes to hash
48+
file (File)
49+
file-like object
4150
42-
seed (int): seed for the hash function
51+
seed (int)
52+
seed for the hash function
4353
4454
4555
Returns
4656
-------
47-
hash (int): u32 hash of the input bytes
57+
hash (Awaitable[int])
58+
u32 hash of the input bytes
4859
4960
5061
Example
5162
-------
5263
```python
53-
import gxhash
54-
55-
input_bytes = bytes([42] * 1000)
64+
file = TemporaryFile()
65+
file.write(bytes([42] * 1000))
66+
file.seek(0)
5667
seed = 1234
57-
print(f"Hash is {gxhash.gxhash32_nogil(input_bytes, seed)}!")
68+
print(f"Hash is {gxhash.gxhash32_async(file, seed)}!")
5869
```
5970
"""
6071

61-
def gxhash64(input_bytes: bytes, seed: int) -> int:
72+
def gxhash64(file: File, seed: int) -> int:
6273
"""
6374
Summary
6475
-------
@@ -67,28 +78,31 @@ def gxhash64(input_bytes: bytes, seed: int) -> int:
6778
6879
Parameters
6980
----------
70-
input_bytes (bytes): input bytes to hash
81+
file (File)
82+
file-like object
7183
72-
seed (int): seed for the hash function
84+
seed (int)
85+
seed for the hash function
7386
7487
7588
Returns
7689
-------
77-
hash (int): u64 hash of the input bytes
90+
hash (int)
91+
u64 hash of the input bytes
7892
7993
8094
Example
8195
-------
8296
```python
83-
import gxhash
84-
85-
input_bytes = bytes([42] * 1000)
97+
file = TemporaryFile()
98+
file.write(bytes([42] * 1000))
99+
file.seek(0)
86100
seed = 1234
87-
print(f"Hash is {gxhash.gxhash64(input_bytes, seed)}!")
101+
print(f"Hash is {gxhash.gxhash64(file, seed)}!")
88102
```
89103
"""
90104

91-
def gxhash64_nogil(input_bytes: bytes, seed: int) -> int:
105+
async def gxhash64_async(file: File, seed: int) -> int:
92106
"""
93107
Summary
94108
-------
@@ -97,28 +111,31 @@ def gxhash64_nogil(input_bytes: bytes, seed: int) -> int:
97111
98112
Parameters
99113
----------
100-
input_bytes (bytes): input bytes to hash
114+
file (File)
115+
file-like object
101116
102-
seed (int): seed for the hash function
117+
seed (int)
118+
seed for the hash function
103119
104120
105121
Returns
106122
-------
107-
hash (int): u64 hash of the input bytes
123+
hash (Awaitable[int])
124+
u64 hash of the input bytes
108125
109126
110127
Example
111128
-------
112129
```python
113-
import gxhash
114-
115-
input_bytes = bytes([42] * 1000)
130+
file = TemporaryFile()
131+
file.write(bytes([42] * 1000))
132+
file.seek(0)
116133
seed = 1234
117-
print(f"Hash is {gxhash.gxhash64_nogil(input_bytes, seed)}!")
134+
print(f"Hash is {gxhash.gxhash64_async(file, seed)}!")
118135
```
119136
"""
120137

121-
def gxhash128(input_bytes: bytes, seed: int) -> int:
138+
def gxhash128(file: File, seed: int) -> int:
122139
"""
123140
Summary
124141
-------
@@ -127,28 +144,31 @@ def gxhash128(input_bytes: bytes, seed: int) -> int:
127144
128145
Parameters
129146
----------
130-
input_bytes (bytes): input bytes to hash
147+
file (File)
148+
file-like object
131149
132-
seed (int): seed for the hash function
150+
seed (int)
151+
seed for the hash function
133152
134153
135154
Returns
136155
-------
137-
hash (int): u128 hash of the input bytes
156+
hash (int)
157+
u128 hash of the input bytes
138158
139159
140160
Example
141161
-------
142162
```python
143-
import gxhash
144-
145-
input_bytes = bytes([42] * 1000)
163+
file = TemporaryFile()
164+
file.write(bytes([42] * 1000))
165+
file.seek(0)
146166
seed = 1234
147-
print(f"Hash is {gxhash.gxhash128(input_bytes, seed)}!")
167+
print(f"Hash is {gxhash.gxhash128(file, seed)}!")
148168
```
149169
"""
150170

151-
def gxhash128_nogil(input_bytes: bytes, seed: int) -> int:
171+
async def gxhash128_async(file: File, seed: int) -> int:
152172
"""
153173
Summary
154174
-------
@@ -157,23 +177,26 @@ def gxhash128_nogil(input_bytes: bytes, seed: int) -> int:
157177
158178
Parameters
159179
----------
160-
input_bytes (bytes): input bytes to hash
180+
file (File)
181+
file-like object
161182
162-
seed (int): seed for the hash function
183+
seed (int)
184+
seed for the hash function
163185
164186
165187
Returns
166188
-------
167-
hash (int): u128 hash of the input bytes
189+
hash (Awaitable[int])
190+
u128 hash of the input bytes
168191
169192
170193
Example
171194
-------
172195
```python
173-
import gxhash
174-
175-
input_bytes = bytes([42] * 1000)
196+
file = TemporaryFile()
197+
file.write(bytes([42] * 1000))
198+
file.seek(0)
176199
seed = 1234
177-
print(f"Hash is {gxhash.gxhash128_nogil(input_bytes, seed)}!")
200+
print(f"Hash is {gxhash.gxhash128_async(file, seed)}!")
178201
```
179202
"""

py-gxhash/src/lib.rs

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,61 @@
11
use pyo3::prelude::*;
2+
use pyo3_async_runtimes::tokio::future_into_py;
3+
use std::os::fd::FromRawFd;
4+
5+
fn get_file_descriptor(py: Python, file: PyObject) -> Result<i32, PyErr> {
6+
file.call_method0(py, pyo3::intern!(py, "fileno"))?.extract(py)
7+
}
8+
9+
fn gxhash<T>(hasher: fn(&[u8], i64) -> T, file_descriptor: i32, seed: i64) -> PyResult<T> {
10+
let file = unsafe { std::fs::File::from_raw_fd(libc::dup(file_descriptor)) };
11+
let mmap = unsafe { memmap2::Mmap::map(&file).unwrap() };
12+
drop(file);
13+
Ok(hasher(&mmap, seed))
14+
}
215

316
#[pyfunction]
4-
fn gxhash32(input_bytes: &[u8], seed: i64) -> PyResult<u32> {
5-
Ok(gxhash::gxhash32(input_bytes, seed))
17+
fn gxhash32(py: Python, file: PyObject, seed: i64) -> PyResult<u32> {
18+
let file_descriptor = get_file_descriptor(py, file)?;
19+
gxhash(gxhash::gxhash32, file_descriptor, seed)
620
}
721

822
#[pyfunction]
9-
fn gxhash32_nogil(py: Python, input_bytes: &[u8], seed: i64) -> PyResult<u32> {
10-
py.allow_threads(|| Ok(gxhash::gxhash32(input_bytes, seed)))
23+
fn gxhash32_async(py: Python, file: PyObject, seed: i64) -> PyResult<Bound<PyAny>> {
24+
let file_descriptor = get_file_descriptor(py, file)?;
25+
future_into_py(py, async move { gxhash(gxhash::gxhash32, file_descriptor, seed) })
1126
}
1227

1328
#[pyfunction]
14-
fn gxhash64(input_bytes: &[u8], seed: i64) -> PyResult<u64> {
15-
Ok(gxhash::gxhash64(input_bytes, seed))
29+
fn gxhash64(py: Python, file: PyObject, seed: i64) -> PyResult<u64> {
30+
let file_descriptor = get_file_descriptor(py, file)?;
31+
gxhash(gxhash::gxhash64, file_descriptor, seed)
1632
}
1733

1834
#[pyfunction]
19-
fn gxhash64_nogil(py: Python, input_bytes: &[u8], seed: i64) -> PyResult<u64> {
20-
py.allow_threads(|| Ok(gxhash::gxhash64(input_bytes, seed)))
35+
fn gxhash64_async(py: Python, file: PyObject, seed: i64) -> PyResult<Bound<PyAny>> {
36+
let file_descriptor = get_file_descriptor(py, file)?;
37+
future_into_py(py, async move { gxhash(gxhash::gxhash64, file_descriptor, seed) })
2138
}
2239

2340
#[pyfunction]
24-
fn gxhash128(input_bytes: &[u8], seed: i64) -> PyResult<u128> {
25-
Ok(gxhash::gxhash128(input_bytes, seed))
41+
fn gxhash128(py: Python, file: PyObject, seed: i64) -> PyResult<u128> {
42+
let file_descriptor = get_file_descriptor(py, file)?;
43+
gxhash(gxhash::gxhash128, file_descriptor, seed)
2644
}
2745

2846
#[pyfunction]
29-
fn gxhash128_nogil(py: Python, input_bytes: &[u8], seed: i64) -> PyResult<u128> {
30-
py.allow_threads(|| Ok(gxhash::gxhash128(input_bytes, seed)))
47+
fn gxhash128_async(py: Python, file: PyObject, seed: i64) -> PyResult<Bound<PyAny>> {
48+
let file_descriptor = get_file_descriptor(py, file)?;
49+
future_into_py(py, async move { gxhash(gxhash::gxhash128, file_descriptor, seed) })
3150
}
3251

3352
#[pymodule(name = "gxhash")]
3453
fn pygxhash(m: &Bound<'_, PyModule>) -> PyResult<()> {
3554
m.add_function(wrap_pyfunction!(gxhash32, m)?)?;
36-
m.add_function(wrap_pyfunction!(gxhash32_nogil, m)?)?;
55+
m.add_function(wrap_pyfunction!(gxhash32_async, m)?)?;
3756
m.add_function(wrap_pyfunction!(gxhash64, m)?)?;
38-
m.add_function(wrap_pyfunction!(gxhash64_nogil, m)?)?;
57+
m.add_function(wrap_pyfunction!(gxhash64_async, m)?)?;
3958
m.add_function(wrap_pyfunction!(gxhash128, m)?)?;
40-
m.add_function(wrap_pyfunction!(gxhash128_nogil, m)?)?;
59+
m.add_function(wrap_pyfunction!(gxhash128_async, m)?)?;
4160
Ok(())
4261
}

0 commit comments

Comments
 (0)