Skip to content

Commit 3196f61

Browse files
committed
make tryMerge and friends close to safe
1 parent af5a4f3 commit 3196f61

File tree

1 file changed

+83
-110
lines changed

1 file changed

+83
-110
lines changed

lib/dictBuilder/zdict.rs

Lines changed: 83 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use libc::{free, malloc, memcpy, size_t};
55
use crate::lib::common::bits::{ZSTD_NbCommonBytes, ZSTD_highbit32};
66
use crate::lib::common::error_private::{ERR_getErrorName, ERR_isError, Error};
77
use crate::lib::common::huf::{HUF_CElt, HUF_CTABLE_WORKSPACE_SIZE_U32, HUF_WORKSPACE_SIZE};
8-
use crate::lib::common::mem::{MEM_read64, MEM_readLE32, MEM_readST, MEM_writeLE32};
8+
use crate::lib::common::mem::{MEM_readLE32, MEM_readST, MEM_writeLE32};
99
use crate::lib::common::xxhash::ZSTD_XXH64;
1010
use crate::lib::common::zstd_internal::{
1111
repStartValue, LLFSELog, MLFSELog, MaxLL, MaxML, OffFSELog, ZSTD_REP_NUM,
@@ -391,142 +391,115 @@ unsafe fn ZDICT_analyzePos(
391391
solution
392392
}
393393

394-
unsafe fn isIncluded(
395-
ip: *const core::ffi::c_char,
396-
into: *const core::ffi::c_char,
397-
length: size_t,
398-
) -> bool {
399-
for u in 0..length {
400-
if *ip.add(u) != *into.add(u) {
401-
return false;
402-
}
403-
}
404-
405-
true
394+
fn isIncluded(ip: &[u8], into: &[u8], length: size_t) -> bool {
395+
ip[..length] == into[..length]
406396
}
407397

408-
unsafe fn ZDICT_tryMerge(
409-
table: *mut DictItem,
398+
fn ZDICT_tryMerge(
399+
table: &mut [DictItem],
410400
mut elt: DictItem,
411401
eltNbToSkip: u32,
412-
buffer: *const core::ffi::c_void,
402+
buffer: &[u8],
413403
) -> u32 {
414-
let tableSize = (*table).pos;
404+
let tableSize = table[0].pos;
415405
let eltEnd = (elt.pos).wrapping_add(elt.length);
416-
let buf = buffer as *const core::ffi::c_char;
417-
let mut u: u32 = 0;
418-
u = 1;
419-
while u < tableSize {
420-
if (u != eltNbToSkip)
421-
&& (*table.offset(u as isize)).pos > elt.pos
422-
&& (*table.offset(u as isize)).pos <= eltEnd
406+
let buf = buffer;
407+
408+
/* tail overlap */
409+
let mut u = 1usize;
410+
while u < tableSize as usize {
411+
if (u as u32 != eltNbToSkip)
412+
&& table[u as usize].pos > elt.pos
413+
&& table[u as usize].pos <= eltEnd
423414
{
424-
let addedLength = ((*table.offset(u as isize)).pos).wrapping_sub(elt.pos);
425-
let fresh2 = &mut (*table.offset(u as isize)).length;
426-
*fresh2 = (*fresh2).wrapping_add(addedLength);
427-
(*table.offset(u as isize)).pos = elt.pos;
428-
let fresh3 = &mut (*table.offset(u as isize)).savings;
429-
*fresh3 = (*fresh3).wrapping_add(elt.savings * addedLength / elt.length);
430-
let fresh4 = &mut (*table.offset(u as isize)).savings;
431-
*fresh4 = (*fresh4).wrapping_add(elt.length / 8);
432-
elt = *table.offset(u as isize);
433-
while u > 1 && (*table.offset(u.wrapping_sub(1) as isize)).savings < elt.savings {
434-
*table.offset(u as isize) = *table.offset(u.wrapping_sub(1) as isize);
435-
u = u.wrapping_sub(1);
415+
/* append */
416+
let addedLength = table[u].pos - elt.pos;
417+
table[u].length += addedLength;
418+
table[u].pos = elt.pos;
419+
table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
420+
table[u].savings += elt.length / 8; /* rough approx bonus */
421+
elt = table[u];
422+
/* sort : improve rank */
423+
while (u > 1) && (table[u - 1].savings < elt.savings) {
424+
table[u] = table[u - 1];
425+
u -= 1;
436426
}
437-
*table.offset(u as isize) = elt;
438-
return u;
427+
table[u] = elt;
428+
return u as u32;
439429
}
440430
u = u.wrapping_add(1);
441431
}
442-
u = 1;
443-
while u < tableSize {
444-
if u != eltNbToSkip {
445-
if ((*table.offset(u as isize)).pos).wrapping_add((*table.offset(u as isize)).length)
446-
>= elt.pos
447-
&& (*table.offset(u as isize)).pos < elt.pos
448-
{
449-
let addedLength_0 = eltEnd as core::ffi::c_int
450-
- ((*table.offset(u as isize)).pos)
451-
.wrapping_add((*table.offset(u as isize)).length)
452-
as core::ffi::c_int;
453-
let fresh5 = &mut (*table.offset(u as isize)).savings;
454-
*fresh5 = (*fresh5).wrapping_add(elt.length / 8);
455-
if addedLength_0 > 0 {
456-
let fresh6 = &mut (*table.offset(u as isize)).length;
457-
*fresh6 = (*fresh6 as core::ffi::c_uint)
458-
.wrapping_add(addedLength_0 as core::ffi::c_uint);
459-
let fresh7 = &mut (*table.offset(u as isize)).savings;
460-
*fresh7 = (*fresh7 as core::ffi::c_uint).wrapping_add(
461-
(elt.savings)
462-
.wrapping_mul(addedLength_0 as core::ffi::c_uint)
463-
.wrapping_div(elt.length),
464-
);
465-
}
466-
elt = *table.offset(u as isize);
467-
while u > 1 && (*table.offset(u.wrapping_sub(1) as isize)).savings < elt.savings {
468-
*table.offset(u as isize) = *table.offset(u.wrapping_sub(1) as isize);
469-
u = u.wrapping_sub(1);
470-
}
471-
*table.offset(u as isize) = elt;
472-
return u;
432+
433+
/* front overlap */
434+
let mut u = 1usize;
435+
while u < tableSize as usize {
436+
if u == eltNbToSkip as usize {
437+
u = u.wrapping_add(1);
438+
continue;
439+
}
440+
441+
/* overlap, existing < new */
442+
if (table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos) {
443+
/* append */
444+
let addedLength = eltEnd as i32 - (table[u].pos + table[u].length) as i32; /* note: can be negative */
445+
table[u].savings += elt.length / 8; /* rough approx bonus */
446+
if addedLength > 0 {
447+
/* otherwise, elt fully included into existing */
448+
table[u].length += addedLength.unsigned_abs();
449+
/* rough approx */
450+
table[u].savings += elt.savings * addedLength.unsigned_abs() / elt.length;
473451
}
474-
if MEM_read64(
475-
buf.offset((*table.offset(u as isize)).pos as isize) as *const core::ffi::c_void
476-
) == MEM_read64(buf.offset(elt.pos as isize).add(1) as *const core::ffi::c_void)
477-
&& isIncluded(
478-
buf.offset((*table.offset(u as isize)).pos as isize),
479-
buf.offset(elt.pos as isize).add(1),
480-
(*table.offset(u as isize)).length as size_t,
481-
)
482-
{
483-
let addedLength_1 = Ord::max(
484-
(elt.length).wrapping_sub((*table.offset(u as isize)).length),
485-
1,
486-
) as size_t;
487-
(*table.offset(u as isize)).pos = elt.pos;
488-
let fresh8 = &mut (*table.offset(u as isize)).savings;
489-
*fresh8 = (*fresh8).wrapping_add(
490-
(elt.savings as size_t * addedLength_1 / elt.length as size_t) as u32,
491-
);
492-
(*table.offset(u as isize)).length = Ord::min(
493-
elt.length,
494-
((*table.offset(u as isize)).length).wrapping_add(1),
495-
);
496-
return u;
452+
/* sort : improve rank */
453+
elt = table[u];
454+
while (u > 1) && (table[u - 1].savings < elt.savings) {
455+
table[u] = table[u - 1];
456+
u -= 1;
497457
}
458+
table[u] = elt;
459+
return u as u32;
498460
}
461+
462+
if buf[table[u].pos as usize..][..8] == buf[elt.pos as usize + 1..][..8] {
463+
if isIncluded(
464+
&buf[table[u].pos as usize..],
465+
&buf[elt.pos as usize + 1..],
466+
table[u].length as usize,
467+
) {
468+
let addedLength = elt.length.checked_sub(table[u].length).unwrap_or(1);
469+
table[u].pos = elt.pos;
470+
table[u].savings += elt.savings * addedLength / elt.length;
471+
table[u].length = Ord::min(elt.length, table[u].length + 1);
472+
return u as u32;
473+
}
474+
}
475+
499476
u = u.wrapping_add(1);
500477
}
478+
501479
0
502480
}
503481

504-
unsafe fn ZDICT_removeDictItem(table: *mut DictItem, id: u32) {
482+
fn ZDICT_removeDictItem(table: &mut [DictItem], id: u32) {
505483
debug_assert_ne!(id, 0);
506484
if id == 0 {
507485
return; // protection, should never happen
508486
}
509-
let max = (*table).pos as isize; // convention: table[0].pos stores the number of elements
510-
for u in id as isize..max.wrapping_sub(1) {
511-
*table.offset(u) = *table.offset(u.wrapping_add(1));
487+
let max = table[0].pos as usize; // convention: table[0].pos stores the number of elements
488+
for u in id as usize..max.wrapping_sub(1) {
489+
table[u] = table[u + 1];
512490
}
513-
(*table).pos = ((*table).pos).wrapping_sub(1);
491+
table[0].pos -= 1;
514492
}
515493

516-
unsafe fn ZDICT_insertDictItem(
517-
table: &mut [DictItem],
518-
elt: DictItem,
519-
buffer: *const core::ffi::c_void,
520-
) {
494+
fn ZDICT_insertDictItem(table: &mut [DictItem], elt: DictItem, buffer: &[u8]) {
521495
let maxSize = table.len() as u32;
522-
let table = table.as_mut_ptr();
523496

524497
// merge if possible
525498
let mut mergeId = ZDICT_tryMerge(table, elt, 0, buffer);
526499
if mergeId != 0 {
527500
let mut newMerge = 1;
528501
while newMerge != 0 {
529-
newMerge = ZDICT_tryMerge(table, *table.offset(mergeId as isize), mergeId, buffer);
502+
newMerge = ZDICT_tryMerge(table, table[mergeId as usize], mergeId, buffer);
530503
if newMerge != 0 {
531504
ZDICT_removeDictItem(table, mergeId);
532505
}
@@ -537,17 +510,17 @@ unsafe fn ZDICT_insertDictItem(
537510

538511
// insert
539512
let mut current: u32 = 0;
540-
let mut nextElt = (*table).pos;
513+
let mut nextElt = table[0].pos;
541514
if nextElt >= maxSize {
542515
nextElt = maxSize.wrapping_sub(1);
543516
}
544517
current = nextElt.wrapping_sub(1);
545-
while (*table.offset(current as isize)).savings < elt.savings {
546-
*table.offset(current.wrapping_add(1) as isize) = *table.offset(current as isize);
518+
while (table[current as usize]).savings < elt.savings {
519+
table[current.wrapping_add(1) as usize] = table[current as usize];
547520
current = current.wrapping_sub(1);
548521
}
549-
*table.offset(current.wrapping_add(1) as isize) = elt;
550-
(*table).pos = nextElt.wrapping_add(1);
522+
table[current as usize + 1] = elt;
523+
table[0].pos = nextElt.wrapping_add(1);
551524
}
552525

553526
unsafe fn ZDICT_dictSize(dictList: &[DictItem]) -> u32 {
@@ -655,7 +628,7 @@ unsafe fn ZDICT_trainBuffer_legacy(
655628
continue;
656629
}
657630

658-
ZDICT_insertDictItem(dictList, solution, buffer.as_ptr().cast());
631+
ZDICT_insertDictItem(dictList, solution, buffer);
659632
cursor += solution.length as usize;
660633

661634
if notificationLevel >= 2 && displayClock.elapsed() > refresh_rate {

0 commit comments

Comments
 (0)