Skip to content

Commit 2b6efcb

Browse files
Merge pull request #1204 from heshpdx:master
PiperOrigin-RevId: 697922880
2 parents f25050c + 8c6d25f commit 2b6efcb

File tree

3 files changed

+10
-5
lines changed

3 files changed

+10
-5
lines changed

c/enc/block_splitter_inc.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
118118
size_t insert_cost_ix = symbol * num_histograms;
119119
double min_cost = 1e99;
120120
double block_switch_cost = block_switch_bitcost;
121+
static const size_t prologue_length = 2000;
122+
static const double multiplier = 0.07 / 2000;
121123
size_t k;
122124
for (k = 0; k < num_histograms; ++k) {
123125
/* We are coding the symbol with entropy code k. */
@@ -128,8 +130,8 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
128130
}
129131
}
130132
/* More blocks for the beginning. */
131-
if (byte_ix < 2000) {
132-
block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
133+
if (byte_ix < prologue_length) {
134+
block_switch_cost *= 0.77 + multiplier * (double)byte_ix;
133135
}
134136
for (k = 0; k < num_histograms; ++k) {
135137
cost[k] -= min_cost;

c/enc/encode.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,9 +437,10 @@ static BROTLI_BOOL ShouldCompress(
437437
if ((double)num_literals > 0.99 * (double)bytes) {
438438
uint32_t literal_histo[256] = { 0 };
439439
static const uint32_t kSampleRate = 13;
440+
static const double kInvSampleRate = 1.0 / 13.0;
440441
static const double kMinEntropy = 7.92;
441442
const double bit_cost_threshold =
442-
(double)bytes * kMinEntropy / kSampleRate;
443+
(double)bytes * kMinEntropy * kInvSampleRate;
443444
size_t t = (bytes + kSampleRate - 1) / kSampleRate;
444445
uint32_t pos = (uint32_t)last_flush_pos;
445446
size_t i;

c/enc/literal_cost.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
106106
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
107107
size_t masked_pos = (pos + i) & mask;
108108
size_t histo = histogram[256 * utf8_pos + data[masked_pos]];
109+
static const size_t prologue_length = 2000;
110+
static const double multiplier = 0.35 / 2000;
109111
double lit_cost;
110112
if (histo == 0) {
111113
histo = 1;
@@ -120,8 +122,8 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
120122
Perhaps because the entropy source is changing its properties
121123
rapidly in the beginning of the file, perhaps because the beginning
122124
of the data is a statistical "anomaly". */
123-
if (i < 2000) {
124-
lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
125+
if (i < prologue_length) {
126+
lit_cost += 0.35 + multiplier * (double)i;
125127
}
126128
cost[i] = (float)lit_cost;
127129
}

0 commit comments

Comments
 (0)