Skip to content

Commit fe6199b

Browse files
committed
fix
1 parent 10dd416 commit fe6199b

1 file changed

Lines changed: 68 additions & 5 deletions

File tree

  • python/functionstream-api/src/fs_api/store/codec

python/functionstream-api/src/fs_api/store/codec/int_codec.py

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,82 @@
1515
from .base import Codec
1616

1717

18+
# First byte: 0x00 = negative, 0x80 = non-negative (lexicographic order = numeric order).
19+
_SIGN_NEG = 0x00
20+
_SIGN_NONNEG = 0x80
21+
# Legacy fixed 8-byte format length (for backward-compatible decode).
22+
_LEGACY_LEN = 8
1823
class IntCodec(Codec[int]):
19-
"""Ordered int codec for range scans (lexicographic byte order)."""
24+
"""Ordered int codec for range scans (lexicographic byte order).
25+
Supports arbitrary-precision Python int (32-bit, 64-bit, and larger).
26+
Encoding is variable-length; decode accepts both legacy 8-byte and new variable-length format.
27+
Exactly 8-byte payloads are always decoded as legacy; variable encoding never produces 8 bytes.
28+
"""
29+
2030
supports_ordered_keys = True
2131

2232
def encode(self, value: int) -> bytes:
23-
mapped = (value & 0xFFFFFFFFFFFFFFFF) ^ (1 << 63)
24-
return struct.pack(">Q", mapped)
33+
if not isinstance(value, int):
34+
raise TypeError(f"expected int, got {type(value).__name__}")
35+
if value < 0:
36+
sign_byte = _SIGN_NEG
37+
mag = -(value + 1) # -1 -> 0, -2 -> 1, ...
38+
n_bits = mag.bit_length() if mag else 0
39+
n_bytes = max(1, (n_bits + 7) // 8)
40+
if n_bytes > 255:
41+
raise OverflowError(
42+
f"int magnitude requires {n_bytes} bytes; at most 255 bytes supported"
43+
)
44+
max_mag = (1 << (8 * n_bytes)) - 1
45+
mag_stored = max_mag - mag
46+
mag_bytes = mag_stored.to_bytes(n_bytes, "big")
47+
else:
48+
sign_byte = _SIGN_NONNEG
49+
mag = value
50+
n_bits = mag.bit_length() if mag else 0
51+
n_bytes = max(1, (n_bits + 7) // 8)
52+
if n_bytes > 255:
53+
raise OverflowError(
54+
f"int magnitude requires {n_bytes} bytes; at most 255 bytes supported"
55+
)
56+
mag_bytes = mag.to_bytes(n_bytes, "big")
57+
# Avoid exactly 8 bytes so decode can treat all 8-byte payloads as legacy.
58+
if 2 + n_bytes == _LEGACY_LEN and n_bytes < 255:
59+
n_bytes += 1
60+
if sign_byte == _SIGN_NEG:
61+
max_mag = (1 << (8 * n_bytes)) - 1
62+
mag_stored = max_mag - mag
63+
mag_bytes = mag_stored.to_bytes(n_bytes, "big")
64+
else:
65+
mag_bytes = (0).to_bytes(1, "big") + mag_bytes
66+
return bytes([sign_byte, n_bytes]) + mag_bytes
2567

2668
def decode(self, data: bytes) -> int:
27-
if len(data) != 8:
28-
raise ValueError(f"invalid int payload length: {len(data)}")
69+
if len(data) == _LEGACY_LEN:
70+
return self._decode_legacy(data)
71+
return self._decode_variable(data)
72+
73+
def _decode_legacy(self, data: bytes) -> int:
74+
"""Decode legacy fixed 8-byte format for backward compatibility."""
2975
mapped = struct.unpack(">Q", data)[0]
3076
raw = mapped ^ (1 << 63)
3177
if raw >= (1 << 63):
3278
return raw - (1 << 64)
3379
return raw
80+
81+
def _decode_variable(self, data: bytes) -> int:
82+
if len(data) < 2:
83+
raise ValueError("invalid int payload: too short (need at least 2 bytes)")
84+
sign_byte, n_bytes = data[0], data[1]
85+
if len(data) != 2 + n_bytes:
86+
raise ValueError(
87+
f"invalid int payload length: expected 2 + {n_bytes} = {2 + n_bytes}, got {len(data)}"
88+
)
89+
mag = int.from_bytes(data[2 : 2 + n_bytes], "big")
90+
if sign_byte == _SIGN_NEG:
91+
max_mag = (1 << (8 * n_bytes)) - 1
92+
mag = max_mag - mag
93+
return -(mag + 1)
94+
if sign_byte == _SIGN_NONNEG:
95+
return mag
96+
raise ValueError(f"invalid int payload: unknown sign byte 0x{sign_byte:02x}")

0 commit comments

Comments
 (0)