11// this software is distributed under the MIT License (http://www.opensource.org/licenses/MIT):
2- //
2+ //
33// Copyright 2018-2020, CWI, TU Munich, FSU Jena
4- //
5- // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files
6- // (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify,
7- // merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
4+ //
5+ // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files
6+ // (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify,
7+ // merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
88// furnished to do so, subject to the following conditions:
9- //
9+ //
1010// - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11- //
12- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
13- // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
14- // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
15- // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
16- //
17- // You can contact the authors via the FSST source repository : https://github.com/cwida/fsst
11+ //
12+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
13+ // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
14+ // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
15+ // IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
16+ //
17+ // You can contact the authors via the FSST source repository : https://github.com/cwida/fsst
1818#include < algorithm>
1919#include < cassert>
2020#include < cstdint>
@@ -59,7 +59,7 @@ typedef uint64_t u64;
5959
6060// we represent codes in u16 (not u8). 12 bits code (of which 10 are used), 4 bits length
6161#define FSST_LEN_BITS 12
62- #define FSST_CODE_BITS 9
62+ #define FSST_CODE_BITS 9
6363#define FSST_CODE_BASE 256UL /* first 256 codes [0,255] are pseudo codes: escaped bytes */
6464#define FSST_CODE_MAX (1UL <<FSST_CODE_BITS) /* all bits set: indicating a symbol that has not been assigned a code yet */
6565#define FSST_CODE_MASK (FSST_CODE_MAX-1UL ) /* all bits set: indicating a symbol that has not been assigned a code yet */
@@ -115,7 +115,7 @@ struct Symbol {
115115 u8 first () const { assert ( length () >= 1 ); return 0xFF & load_num (); }
116116 u16 first2 () const { assert ( length () >= 2 ); return 0xFFFF & load_num (); }
117117
118- #define FSST_HASH_LOG2SIZE 10
118+ #define FSST_HASH_LOG2SIZE 10
119119#define FSST_HASH_PRIME 2971215073LL
120120#define FSST_SHIFT 15
121121#define FSST_HASH (w ) (((w)*FSST_HASH_PRIME)^(((w)*FSST_HASH_PRIME)>>FSST_SHIFT))
@@ -129,7 +129,7 @@ struct QSymbol{
129129 bool operator ==(const QSymbol& other) const { return symbol.val .num == other.symbol .val .num && symbol.length () == other.symbol .length (); }
130130};
131131
132- // we construct FSST symbol tables using a random sample of about 16KB (1<<14)
132+ // we construct FSST symbol tables using a random sample of about 16KB (1<<14)
133133#define FSST_SAMPLETARGET (1 <<14 )
134134#define FSST_SAMPLEMAXSZ ((long ) 2 *FSST_SAMPLETARGET)
135135
@@ -138,15 +138,15 @@ struct QSymbol{
138138// (1) to encode values we probe (and maintain) three datastructures:
139139// - u16 byteCodes[256] array at the position of the next byte (s.length==1)
140140// - u16 shortCodes[65536] array at the position of the next twobyte pattern (s.length==2)
141- // - Symbol hashtable[1024] (keyed by the next three bytes, ie for s.length>2),
142- // this search will yield a u16 code, it points into Symbol symbols[]. You always find a hit, because the first 256 codes are
141+ // - Symbol hashtable[1024] (keyed by the next three bytes, ie for s.length>2),
142+ // this search will yield a u16 code, it points into Symbol symbols[]. You always find a hit, because the first 256 codes are
143143// pseudo codes representing a single byte these will become escapes)
144144//
145145// (2) when we finished looking for the best symbol table we call optimize() to reshape it:
146146// - it renumbers the codes by length (first symbols of length 2,3,4,5,6,7,8; then 1 (starting from byteLim are symbols of length 1)
147- // length 2 codes for which no longer suffix symbol exists (< suffixLim) come first among the 2-byte codes
147+ // length 2 codes for which no longer suffix symbol exists (< suffixLim) come first among the 2-byte codes
148148// (allows shortcut during compression)
149- // - for each two-byte combination, in all unused slots of shortCodes[], it enters the byteCode[] of the symbol corresponding
149+ // - for each two-byte combination, in all unused slots of shortCodes[], it enters the byteCode[] of the symbol corresponding
150150// to the first byte (if such a single-byte symbol exists). This allows us to just probe the next two bytes (if there is only one
151151// byte left in the string, there is still a terminator-byte added during compression) in shortCodes[]. That is, byteCodes[]
152152// and its codepath is no longer required. This makes compression faster. The reason we use byteCodes[] during symbolTable construction
@@ -173,9 +173,9 @@ struct SymbolTable {
173173 u16 byteCodes[256 ]; // contains code for every 1-byte symbol, otherwise code for pseudo byte (escaped byte)
174174
175175 // 'symbols' is the current symbol table symbol[code].symbol is the max 8-byte 'symbol' for single-byte 'code'
176- Symbol symbols[FSST_CODE_MAX]; // x in [0,255]: pseudo symbols representing escaped byte x; x in [FSST_CODE_BASE=256,256+nSymbols]: real symbols
176+ Symbol symbols[FSST_CODE_MAX]; // x in [0,255]: pseudo symbols representing escaped byte x; x in [FSST_CODE_BASE=256,256+nSymbols]: real symbols
177177
178- // replicate long symbols in hashTab (avoid indirection).
178+ // replicate long symbols in hashTab (avoid indirection).
179179 Symbol hashTab[hashTabSize]; // used for all symbols of 3 and more bytes
180180
181181 u16 nSymbols; // amount of symbols in the map (max 255)
@@ -225,8 +225,8 @@ struct SymbolTable {
225225 u32 idx = symbols[i].hash () & (hashTabSize-1 );
226226 hashTab[idx].val .num = 0 ;
227227 hashTab[idx].icl = FSST_ICL_FREE; // marks empty in hashtab
228- }
229- }
228+ }
229+ }
230230 nSymbols = 0 ; // no need to clean symbols[] as no symbols are used
231231 }
232232 bool hashInsert (Symbol s) {
@@ -256,11 +256,11 @@ struct SymbolTable {
256256 u16 findLongestSymbol (Symbol s) const {
257257 size_t idx = s.hash () & (hashTabSize-1 );
258258 if (hashTab[idx].icl <= s.icl && hashTab[idx].load_num () == (s.load_num () & (0xFFFFFFFFFFFFFFFF >> ((u8 ) hashTab[idx].icl )))) {
259- return (hashTab[idx].icl >>16 ) & FSST_CODE_MASK; // matched a long symbol
259+ return (hashTab[idx].icl >>16 ) & FSST_CODE_MASK; // matched a long symbol
260260 }
261261 if (s.length () >= 2 ) {
262262 u16 code = shortCodes[s.first2 ()] & FSST_CODE_MASK;
263- if (code >= FSST_CODE_BASE) return code;
263+ if (code >= FSST_CODE_BASE) return code;
264264 }
265265 return byteCodes[s.first ()] & FSST_CODE_MASK;
266266 }
@@ -273,23 +273,23 @@ struct SymbolTable {
273273 // consequently we needed more than 8 bits during symbol table contruction, but can simplify the codes to single bytes in finalize()
274274 // (this feature is in fact lo longer used, but could still be exploited: symbol construction creates no more than 255 symbols in each pass)
275275 // - we not only reduce the amount of codes to <255, but also *reorder* the symbols and renumber their codes, for higher compression perf.
276- // we renumber codes so they are grouped by length, to allow optimized scalar string compression (byteLim and suffixLim optimizations).
276+ // we renumber codes so they are grouped by length, to allow optimized scalar string compression (byteLim and suffixLim optimizations).
277277 // - we make the use of byteCode[] no longer necessary by inserting single-byte codes in the free spots of shortCodes[]
278278 // Using shortCodes[] only makes compression faster. When creating the symbolTable, however, using shortCodes[] for the single-byte
279279 // symbols is slow, as each insert touches 256 positions in it. This optimization was added when optimizing symbolTable construction time.
280280 //
281281 // In all, we change the layout and coding, as follows..
282282 //
283- // before finalize():
283+ // before finalize():
284284 // - The real symbols are symbols[256..256+nSymbols>. As we may have nSymbols > 255
285285 // - The first 256 codes are pseudo symbols (all escaped bytes)
286286 //
287- // after finalize():
288- // - table layout is symbols[0..nSymbols>, with nSymbols < 256.
289- // - Real codes are [0,nSymbols>. 8-th bit not set.
287+ // after finalize():
288+ // - table layout is symbols[0..nSymbols>, with nSymbols < 256.
289+ // - Real codes are [0,nSymbols>. 8-th bit not set.
290290 // - Escapes in shortCodes have the 8th bit set (value: 256+255=511). 255 because the code to be emitted is the escape byte 255
291291 // - symbols are grouped by length: 2,3,4,5,6,7,8, then 1 (single-byte codes last)
292- // the two-byte codes are split in two sections:
292+ // the two-byte codes are split in two sections:
293293 // - first section contains codes for symbols for which there is no longer symbol (no suffix). It allows an early-out during compression
294294 //
295295 // finally, shortCodes[] is modified to also encode all single-byte symbols (hence byteCodes[] is not required on a critical path anymore).
@@ -298,7 +298,7 @@ struct SymbolTable {
298298 assert (nSymbols <= 255 );
299299 u8 newCode[256 ], rsum[8 ], byteLim = nSymbols - (lenHisto[0 ] - zeroTerminated);
300300
301- // compute running sum of code lengths (starting offsets for each length)
301+ // compute running sum of code lengths (starting offsets for each length)
302302 rsum[0 ] = byteLim; // 1-byte codes are highest
303303 rsum[1 ] = zeroTerminated;
304304 for (u32 i=1 ; i<7 ; i++)
@@ -308,34 +308,34 @@ struct SymbolTable {
308308 suffixLim = rsum[1 ];
309309 symbols[newCode[0 ] = 0 ] = symbols[256 ]; // keep symbol 0 in place (for zeroTerminated cases only)
310310
311- for (u32 i=zeroTerminated, j=rsum[2 ]; i<nSymbols; i++) {
311+ for (u32 i=zeroTerminated, j=rsum[2 ]; i<nSymbols; i++) {
312312 Symbol s1 = symbols[FSST_CODE_BASE+i];
313313 u32 len = s1.length (), opt = (len == 2 )*nSymbols;
314314 if (opt) {
315315 u16 first2 = s1.first2 ();
316- for (u32 k=0 ; k<opt; k++) {
316+ for (u32 k=0 ; k<opt; k++) {
317317 Symbol s2 = symbols[FSST_CODE_BASE+k];
318318 if (k != i && s2.length () > 1 && first2 == s2.first2 ()) // test if symbol k is a suffix of s
319319 opt = 0 ;
320320 }
321- newCode[i] = opt?suffixLim++:--j; // symbols without a larger suffix have a code < suffixLim
322- } else
321+ newCode[i] = opt?suffixLim++:--j; // symbols without a larger suffix have a code < suffixLim
322+ } else
323323 newCode[i] = rsum[len-1 ]++;
324324 s1.set_code_len (newCode[i],len);
325- symbols[newCode[i]] = s1;
325+ symbols[newCode[i]] = s1;
326326 }
327- // renumber the codes in byteCodes[]
328- for (u32 i=0 ; i<256 ; i++)
327+ // renumber the codes in byteCodes[]
328+ for (u32 i=0 ; i<256 ; i++)
329329 if ((byteCodes[i] & FSST_CODE_MASK) >= FSST_CODE_BASE)
330330 byteCodes[i] = newCode[(u8 ) byteCodes[i]] + (1 << FSST_LEN_BITS);
331- else
331+ else
332332 byteCodes[i] = 511 + (1 << FSST_LEN_BITS);
333-
334- // renumber the codes in shortCodes[]
333+
334+ // renumber the codes in shortCodes[]
335335 for (u32 i=0 ; i<65536 ; i++)
336336 if ((shortCodes[i] & FSST_CODE_MASK) >= FSST_CODE_BASE)
337337 shortCodes[i] = newCode[(u8 ) shortCodes[i]] + (shortCodes[i] & (15 << FSST_LEN_BITS));
338- else
338+ else
339339 shortCodes[i] = byteCodes[i&0xFF ];
340340
341341 // replace the symbols in the hash table
@@ -347,22 +347,22 @@ struct SymbolTable {
347347
348348#ifdef NONOPT_FSST
349349struct Counters {
350- u16 count1[FSST_CODE_MAX]; // array to count frequency of symbols as they occur in the sample
351- u16 count2[FSST_CODE_MAX][FSST_CODE_MAX]; // array to count subsequent combinations of two symbols in the sample
350+ u16 count1[FSST_CODE_MAX]; // array to count frequency of symbols as they occur in the sample
351+ u16 count2[FSST_CODE_MAX][FSST_CODE_MAX]; // array to count subsequent combinations of two symbols in the sample
352352
353- void count1Set (u32 pos1, u16 val) {
353+ void count1Set (u32 pos1, u16 val) {
354354 count1[pos1] = val;
355355 }
356- void count1Inc (u32 pos1) {
356+ void count1Inc (u32 pos1) {
357357 count1[pos1]++;
358358 }
359- void count2Inc (u32 pos1, u32 pos2) {
359+ void count2Inc (u32 pos1, u32 pos2) {
360360 count2[pos1][pos2]++;
361361 }
362- u32 count1GetNext (u32 &pos1) {
362+ u32 count1GetNext (u32 &pos1) {
363363 return count1[pos1];
364364 }
365- u32 count2GetNext (u32 pos1, u32 &pos2) {
365+ u32 count2GetNext (u32 pos1, u32 &pos2) {
366366 return count2[pos1][pos2];
367367 }
368368 void backup1 (u8 *buf) {
@@ -383,16 +383,16 @@ struct Counters {
383383 u8 count2High[FSST_CODE_MAX][FSST_CODE_MAX/2 ]; // array to count subsequent combinations of two symbols in the sample (12-bits: 8-bits low, 4-bits high)
384384 u8 count2Low[FSST_CODE_MAX][FSST_CODE_MAX]; // its value is (count2High*256+count2Low) -- but high is 4-bits (we put two numbers in one, hence /2)
385385 // 385KB -- but hot area likely just 10 + 30*4 = 130 cache lines (=8KB)
386-
387- void count1Set (u32 pos1, u16 val) {
386+
387+ void count1Set (u32 pos1, u16 val) {
388388 count1Low[pos1] = val&255 ;
389389 count1High[pos1] = val>>8 ;
390390 }
391- void count1Inc (u32 pos1) {
391+ void count1Inc (u32 pos1) {
392392 if (!count1Low[pos1]++) // increment high early (when low==0, not when low==255). This means (high > 0) <=> (cnt > 0)
393393 count1High[pos1]++; // (0,0)->(1,1)->..->(255,1)->(0,1)->(1,2)->(2,2)->(3,2)..(255,2)->(0,2)->(1,3)->(2,3)...
394394 }
395- void count2Inc (u32 pos1, u32 pos2) {
395+ void count2Inc (u32 pos1, u32 pos2) {
396396 if (!count2Low[pos1][pos2]++) // increment high early (when low==0, not when low==255). This means (high > 0) <=> (cnt > 0)
397397 // inc 4-bits high counter with 1<<0 (1) or 1<<4 (16) -- depending on whether pos2 is even or odd, repectively
398398 count2High[pos1][(pos2)>>1 ] += 1 << (((pos2)&1 )<<2 ); // we take our chances with overflow.. (4K maxval, on a 8K sample)
@@ -432,39 +432,42 @@ struct Counters {
432432 memcpy (count1High, buf, FSST_CODE_MAX);
433433 memcpy (count1Low, buf+FSST_CODE_MAX, FSST_CODE_MAX);
434434 }
435- };
435+ };
436436#endif
437437
438438
439439#define FSST_BUFSZ (3 <<19 ) // 768KB
440440
441- // an encoder is a symbolmap plus some bufferspace, needed during map construction as well as compression
441+ // an encoder is a symbolmap plus some bufferspace, needed during map construction as well as compression
442442struct Encoder {
443443 shared_ptr<SymbolTable> symbolTable; // symbols, plus metadata and data structures for quick compression (shortCode,hashTab, etc)
444444 union {
445445 Counters counters; // for counting symbol occurences during map construction
446- u8 simdbuf[FSST_BUFSZ]; // for compression: SIMD string staging area 768KB = 256KB in + 512KB out (worst case for 256KB in)
446+ u8 simdbuf[FSST_BUFSZ]; // for compression: SIMD string staging area 768KB = 256KB in + 512KB out (worst case for 256KB in)
447447 };
448448};
449449
450450// job control integer representable in one 64bits SIMD lane: cur/end=input, out=output, pos=which string (2^9=512 per call)
451451struct SIMDjob {
452- u64 out:19 ,pos:9 ,end:18 ,cur:18 ; // cur/end is input offsets (2^18=256KB), out is output offset (2^19=512KB)
452+ u64 out:19 ,pos:9 ,end:18 ,cur:18 ; // cur/end is input offsets (2^18=256KB), out is output offset (2^19=512KB)
453453};
454454
455- extern bool
455+ extern bool
456456fsst_hasAVX512 (); // runtime check for avx512 capability
457457
458- extern size_t
458+ extern size_t
459459fsst_compressAVX512 (
460- SymbolTable &symbolTable,
460+ SymbolTable &symbolTable,
461461 u8 * codeBase, // IN: base address for codes, i.e. compression output (points to simdbuf+256KB)
462462 u8 * symbolBase, // IN: base address for string bytes, i.e. compression input (points to simdbuf)
463463 SIMDjob* input, // IN: input array (size n) with job information: what to encode, where to store it.
464464 SIMDjob* output, // OUT: output array (size n) with job information: how much got encoded, end output pointer.
465465 size_t n, // IN: size of arrays input and output (should be max 512)
466466 size_t unroll); // IN: degree of SIMD unrolling
467467
468+ // Symbol manipulation
469+ Symbol concat (Symbol a, Symbol b);
470+
468471// C++ fsst-compress function with some more control of how the compression happens (algorithm flavor, simd unroll degree)
469472size_t compressImpl (Encoder *encoder, size_t n, size_t lenIn[], u8 *strIn[], size_t size, u8 * output, size_t *lenOut, u8 *strOut[], bool noSuffixOpt, bool avoidBranch, int simd);
470473size_t compressAuto (Encoder *encoder, size_t n, size_t lenIn[], u8 *strIn[], size_t size, u8 * output, size_t *lenOut, u8 *strOut[], int simd);
0 commit comments