Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .changeset/fast-expressions-skip-scans.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
"htmljs-parser": patch
---

Speed up expression parsing by skipping work that provably cannot match. An
identifier/number character is never whitespace, never a terminator (no
`shouldTerminate` implementation matches a word character), and is not handled
by the expression switch, so it now takes a fast path that just advances the
position. The unary/binary operator keyword scans also bail out immediately when
the surrounding character cannot start or end a keyword. This improves
steady-state parsing throughput with no behavior change.
19 changes: 19 additions & 0 deletions src/states/EXPRESSION.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,16 @@ export const EXPRESSION: StateDefinition<ExpressionMeta> = {
continue;
}

// Fast path: an identifier/number character is never whitespace, never
// a terminator (no `shouldTerminate` implementation matches a word
// character), and is not handled by the switch below, so it just
// advances. Short-circuiting here skips the termination checks and the
// switch dispatch for the bulk of expression content.
if (isWordCode(code)) {
this.pos++;
continue;
}

// Termination checks (no groupStack)
if (!expression.groupStack.length) {
if (expression.terminatedByWhitespace && isWhitespaceCode(code)) {
Expand Down Expand Up @@ -461,6 +471,10 @@ function lookBehindForOperator(
}

default: {
// Every unary keyword ends in a lowercase letter; if the character
// before `pos` is not one, no keyword can match.
if (code < CODE.LOWER_A || code > CODE.LOWER_Z) return -1;

for (const keyword of expression.inType
? tsUnaryKeywords
: unaryKeywords) {
Expand Down Expand Up @@ -510,6 +524,11 @@ function lookAheadForOperator(
}

default: {
// Every binary keyword starts with a lowercase letter; if the character
// at `pos` is not one, no keyword can match.
const startCode = data.charCodeAt(pos);
if (startCode < CODE.LOWER_A || startCode > CODE.LOWER_Z) return -1;

for (const keyword of binaryKeywords) {
const keywordPos = lookAheadFor(data, pos, keyword);
if (keywordPos === -1) continue;
Expand Down
Loading