Skip to content

Commit 6649523

Browse files
TocarIPcopybara-github
authored andcommitted
Make Brotli decompression faster
Makes it ~8% faster on my skylake desktop. PiperOrigin-RevId: 689499172
1 parent 350100a commit 6649523

File tree

1 file changed

+56
-19
lines changed

1 file changed

+56
-19
lines changed

c/dec/decode.c

Lines changed: 56 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2006,35 +2006,72 @@ static BROTLI_INLINE BrotliDecoderErrorCode ProcessCommandsInternal(
20062006
brotli_reg_t bits;
20072007
brotli_reg_t value;
20082008
PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
2009-
do {
2010-
if (!CheckInputAmount(safe, br)) {
2011-
s->state = BROTLI_STATE_COMMAND_INNER;
2012-
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
2013-
goto saveStateAndReturn;
2009+
if (!safe) {
2010+
// This is a hottest part of the decode, so we copy the loop below
2011+
// and optimize it by calculating the number of steps where all checks
2012+
// evaluate to false (ringbuffer size/block size/input size).
2013+
// Since all checks are loop invariant, we just need to find
2014+
// minimal number of iterations for a simple loop, and run
2015+
// the full version for the remainder.
2016+
int num_steps = i - 1;
2017+
if (num_steps > 0 && ((brotli_reg_t)(num_steps) > s->block_length[0])) {
2018+
// Safe cast, since block_length < steps
2019+
num_steps = (int)s->block_length[0];
20142020
}
2015-
if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
2016-
goto NextLiteralBlock;
2021+
if (s->ringbuffer_size >= pos &&
2022+
(s->ringbuffer_size - pos) <= num_steps) {
2023+
num_steps = s->ringbuffer_size - pos - 1;
20172024
}
2018-
if (!safe) {
2025+
if (num_steps < 0) {
2026+
num_steps = 0;
2027+
}
2028+
num_steps = BrotliCopyPreloadedSymbolsToU8(s->literal_htree, br, &bits,
2029+
&value, s->ringbuffer, pos,
2030+
num_steps);
2031+
pos += num_steps;
2032+
s->block_length[0] -= (brotli_reg_t)num_steps;
2033+
i -= num_steps;
2034+
do {
2035+
if (!CheckInputAmount(safe, br)) {
2036+
s->state = BROTLI_STATE_COMMAND_INNER;
2037+
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
2038+
goto saveStateAndReturn;
2039+
}
2040+
if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
2041+
goto NextLiteralBlock;
2042+
}
20192043
BrotliCopyPreloadedSymbolsToU8(s->literal_htree, br, &bits, &value,
20202044
s->ringbuffer, pos, 1);
2021-
} else {
2045+
--s->block_length[0];
2046+
BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos);
2047+
++pos;
2048+
if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
2049+
s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
2050+
--i;
2051+
goto saveStateAndReturn;
2052+
}
2053+
} while (--i != 0);
2054+
} else { /* safe */
2055+
do {
2056+
if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
2057+
goto NextLiteralBlock;
2058+
}
20222059
brotli_reg_t literal;
20232060
if (!SafeReadSymbol(s->literal_htree, br, &literal)) {
20242061
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
20252062
goto saveStateAndReturn;
20262063
}
20272064
s->ringbuffer[pos] = (uint8_t)literal;
2028-
}
2029-
--s->block_length[0];
2030-
BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos);
2031-
++pos;
2032-
if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
2033-
s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
2034-
--i;
2035-
goto saveStateAndReturn;
2036-
}
2037-
} while (--i != 0);
2065+
--s->block_length[0];
2066+
BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos);
2067+
++pos;
2068+
if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
2069+
s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
2070+
--i;
2071+
goto saveStateAndReturn;
2072+
}
2073+
} while (--i != 0);
2074+
}
20382075
} else {
20392076
uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask];
20402077
uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask];

0 commit comments

Comments
 (0)