Skip to content

Commit 1150b61

Browse files
committed
Xor fuse filter
1 parent 067b91c commit 1150b61

File tree

4 files changed

+197
-3
lines changed

4 files changed

+197
-3
lines changed
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
package org.fastfilter.xor;
2+
3+
import org.fastfilter.Filter;
4+
import org.fastfilter.utils.Hash;
5+
6+
/**
7+
* The Xor Fuse Filter, a new algorithm that can replace a bloom filter.
8+
*
9+
* It is related to the BDZ algorithm [1] (a minimal perfect hash function
10+
* algorithm).
11+
*
12+
* [1] paper: Simple and Space-Efficient Minimal Perfect Hash Functions -
13+
* http://cmph.sourceforge.net/papers/wads07.pdf
14+
*/
15+
public class XorFuse8 implements Filter {
16+
17+
private static final int BITS_PER_FINGERPRINT = 8;
18+
private static final int HASHES = 3;
19+
20+
private static final int FUSE_ARITY = 3;
21+
private static final int FUSE_SEGMENT_COUNT = 100;
22+
private static final int FUSE_SLOTS = FUSE_SEGMENT_COUNT + FUSE_ARITY - 1;
23+
24+
private final int size;
25+
private final int segmentLength;
26+
private final int arrayLength;
27+
private long seed;
28+
private byte[] fingerprints;
29+
private final int bitCount;
30+
31+
public long getBitCount() {
32+
return bitCount;
33+
}
34+
35+
private static int getArrayLength(int size, double factor) {
36+
int capacity = (int) (1.0 / factor * size);
37+
capacity = (capacity + FUSE_SLOTS - 1) / FUSE_SLOTS * FUSE_SLOTS;
38+
return capacity;
39+
}
40+
41+
public static XorFuse8 construct(long[] keys) {
42+
int size = keys.length;
43+
double factor = 0.879;
44+
if (size < 1_000) {
45+
factor = 0.5;
46+
} else if (size < 10_000) {
47+
factor = 0.7;
48+
} else if (size < 100_000) {
49+
factor = 0.8;
50+
}
51+
while (true) {
52+
try {
53+
return new XorFuse8(keys, factor);
54+
} catch (UnsupportedOperationException e) {
55+
// try again with a lower load
56+
factor -= 0.1;
57+
}
58+
}
59+
}
60+
61+
public XorFuse8(long[] keys, double factor) {
62+
this.size = keys.length;
63+
arrayLength = getArrayLength(size, factor);
64+
segmentLength = arrayLength / FUSE_SLOTS;
65+
bitCount = arrayLength * BITS_PER_FINGERPRINT;
66+
int m = arrayLength;
67+
long[] reverseOrder = new long[size];
68+
byte[] reverseH = new byte[size];
69+
int reverseOrderPos;
70+
long seed;
71+
int x = 0;
72+
do {
73+
x++;
74+
if (x > 10) {
75+
throw new UnsupportedOperationException();
76+
}
77+
seed = Hash.randomSeed();
78+
byte[] t2count = new byte[m];
79+
long[] t2 = new long[m];
80+
for (long k : keys) {
81+
for (int hi = 0; hi < HASHES; hi++) {
82+
int h = getHash(k, seed, hi);
83+
t2[h] ^= k;
84+
if (t2count[h] > 120) {
85+
// probably something wrong with the hash function
86+
throw new IllegalArgumentException();
87+
}
88+
t2count[h]++;
89+
}
90+
}
91+
reverseOrderPos = 0;
92+
int[] alone = new int[arrayLength];
93+
int alonePos = 0;
94+
for (int i = 0; i < arrayLength; i++) {
95+
if (t2count[ i] == 1) {
96+
alone[alonePos++] = i;
97+
}
98+
}
99+
int found = -1;
100+
while (alonePos > 0) {
101+
int i = alone[--alonePos];
102+
if (t2count[i] <= 0) {
103+
continue;
104+
}
105+
if (t2count[i] != 1) {
106+
throw new AssertionError();
107+
}
108+
--t2count[i];
109+
long k = t2[i];
110+
for (int hi = 0; hi < HASHES; hi++) {
111+
int h = getHash(k, seed, hi);
112+
int newCount = --t2count[h];
113+
if (h == i) {
114+
found = hi;
115+
} else {
116+
if (newCount == 1) {
117+
alone[alonePos++] = h;
118+
}
119+
t2[h] ^= k;
120+
}
121+
}
122+
reverseOrder[reverseOrderPos] = k;
123+
reverseH[reverseOrderPos] = (byte) found;
124+
reverseOrderPos++;
125+
}
126+
} while (reverseOrderPos != size);
127+
this.seed = seed;
128+
byte[] fp = new byte[m];
129+
for (int i = reverseOrderPos - 1; i >= 0; i--) {
130+
long k = reverseOrder[i];
131+
int found = reverseH[i];
132+
int change = -1;
133+
long hash = Hash.hash64(k, seed);
134+
int xor = fingerprint(hash);
135+
for (int hi = 0; hi < HASHES; hi++) {
136+
int h = getHash(k, seed, hi);
137+
if (found == hi) {
138+
change = h;
139+
} else {
140+
xor ^= fp[h];
141+
}
142+
}
143+
fp[change] = (byte) xor;
144+
}
145+
fingerprints = new byte[m];
146+
System.arraycopy(fp, 0, fingerprints, 0, fp.length);
147+
}
148+
149+
@Override
150+
public boolean mayContain(long key) {
151+
long hash = Hash.hash64(key, seed);
152+
int f = fingerprint(hash);
153+
int r0 = (int) ((0xBF58476D1CE4E5B9L * hash) >> 32);
154+
int r1 = (int) hash;
155+
int r2 = (int) Long.rotateLeft(hash, 21);
156+
int r3 = (int) Long.rotateLeft(hash, 42);
157+
int seg = Hash.reduce(r0, FUSE_SEGMENT_COUNT);
158+
int h0 = (seg + 0) * segmentLength + Hash.reduce(r1, segmentLength);
159+
int h1 = (seg + 1) * segmentLength + Hash.reduce(r2, segmentLength);
160+
int h2 = (seg + 2) * segmentLength + Hash.reduce(r3, segmentLength);
161+
f ^= fingerprints[h0] ^ fingerprints[h1] ^ fingerprints[h2];
162+
return (f & 0xff) == 0;
163+
}
164+
165+
private int getHash(long key, long seed, int index) {
166+
long hash = Hash.hash64(key, seed);
167+
int r0 = (int) ((0xBF58476D1CE4E5B9L * hash) >> 32);
168+
int seg = Hash.reduce(r0, FUSE_SEGMENT_COUNT);
169+
int r = (int) Long.rotateLeft(hash, 21 * index);
170+
return (seg + index) * segmentLength + Hash.reduce(r, segmentLength);
171+
}
172+
173+
private int fingerprint(long hash) {
174+
return (int) (hash & ((1 << BITS_PER_FINGERPRINT) - 1));
175+
}
176+
177+
}

fastfilter/src/test/java/org/fastfilter/TestAllFilters.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ public class TestAllFilters {
8787

8888
public static void main(String... args) {
8989
Hash.setSeed(1);
90+
/*
9091
for (int size = 1_000_000; size <= 10_000_000; size *= 10) {
9192
System.out.println("size " + size);
9293
for (int test = 0; test < 10; test++) {
@@ -97,7 +98,15 @@ public static void main(String... args) {
9798
test(TestFilterType.SUCCINCT_COUNTING_BLOOM_RANKED, size, test, true);
9899
}
99100
}
100-
101+
*/
102+
for (int size = 1; size <= 100; size++) {
103+
System.out.println("size " + size);
104+
test(TestFilterType.XOR_FUSE_8, size, 0, true);
105+
}
106+
for (int size = 100; size <= 100000; size *= 1.1) {
107+
System.out.println("size " + size);
108+
test(TestFilterType.XOR_FUSE_8, size, 0, true);
109+
}
101110
for (int size = 1_000_000; size <= 8_000_000; size *= 2) {
102111
System.out.println("size " + size);
103112
testAll(size, true);

fastfilter/src/test/java/org/fastfilter/TestFilterType.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.fastfilter.mphf.MPHFilter;
1313
import org.fastfilter.xor.Xor16;
1414
import org.fastfilter.xor.Xor8;
15+
import org.fastfilter.xor.XorFuse8;
1516
import org.fastfilter.xor.XorSimple;
1617
import org.fastfilter.xor.XorSimple2;
1718
import org.fastfilter.xorplus.XorPlus8;
@@ -92,6 +93,12 @@ public Filter construct(long[] keys, int setting) {
9293
return XorPlus8.construct(keys);
9394
}
9495
},
96+
XOR_FUSE_8 {
97+
@Override
98+
public Filter construct(long[] keys, int setting) {
99+
return XorFuse8.construct(keys);
100+
}
101+
},
95102
CUCKOO_8 {
96103
@Override
97104
public Filter construct(long[] keys, int setting) {

fastfilter/src/test/java/org/fastfilter/xor/ProbabilityFuse.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,14 @@ public static void main(String... args) {
3232
}
3333
}
3434
p = 1.0 * successCount / testCount;
35-
if (p < 0.6 && factor > 0.1) {
35+
double minP = 0.01;
36+
if (p < minP && factor > 0.1) {
3637
factor -= change;
3738
if (lastDirection != -1) {
3839
lastDirection = -1;
3940
change = change / 2;
4041
}
41-
} else if (p > 0.61) {
42+
} else if (p > minP * 1.1) {
4243
if (change < 0.0001) {
4344
break;
4445
}

0 commit comments

Comments
 (0)