keymanapp · jahorton · May 27, 2026
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/token-result-mapping.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/token-result-mapping.ts
@@ -75,6 +75,10 @@ export class TokenResultMapping implements CorrectionResultMapping<SearchNode>,
     return this.node;
   }
 
+  get inputCount(): number {
+    return this.matchingSpace.inputCount;
+  }
+
   get inputSequence(): ProbabilityMass<Transform>[] {
     return this.node.priorInput;
   }

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-corrector.ts
@@ -34,6 +34,7 @@ import { MAX_EDIT_THRESHOLD_FACTOR } from "./search-quotient-spur.js";
 export type TokenResult = {
   matchString: string,
   inputSamplingCost: number,
+  inputCount: number,
   knownCost: number,
   totalCost: number
 }
@@ -196,6 +197,7 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
       this._generatedTokenResults.set(uncorrectable.spaceId, {
         matchString: lockedResult.text,
         inputSamplingCost: -Math.log(lockedResult.p),
+        inputCount: uncorrectable.inputCount,
         knownCost: 0,
         totalCost: -Math.log(lockedResult.p)
       });
@@ -298,6 +300,7 @@ export class TokenizationCorrector implements CorrectionSearchable<ReadonlyArray
         this._generatedTokenResults.set(correctableToUpdate.spaceId, {
           matchString: lockedResult.text,
           inputSamplingCost: -Math.log(lockedResult.p),
+          inputCount: correctableToUpdate.inputCount,
           knownCost: MAX_EDIT_THRESHOLD_FACTOR, // we'll use the same threshold at which further search is terminated.
           totalCost: -Math.log(lockedResult.p) + MAX_EDIT_THRESHOLD_FACTOR * EDIT_DISTANCE_COST_SCALE
         });

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -221,15 +221,19 @@ export function determineTraversallessCorrectionSequences(
     const match: TokenResult = {
       matchString: wordbreak(postContext),
       inputSamplingCost: -Math.log(correction.p),
+      inputCount: 2,
       knownCost: 0,
       totalCost: -Math.log(correction.p)
     };
 
-    const suggestionParams = buildCorrectionSequence(transitionEffects, context, match, 1);
-
     const tokenizationMapping = mapWhitespacedTokenization(tokenization.left.map((t) => { return {exampleInput: t.text, codepointLength: KMWString.length(t.text)} }), lexicalModel, correction.sample);
     const tokenizedCorrection = tokenizationMapping.tokenizedTransform;
     const tokenizedCorrectionEntries = [...tokenizedCorrection.values()];
+    if(tokenizedCorrection.size > 1 || correction.sample.insert.length >= match.matchString.length) {
+      match.inputCount = 1;
+    }
+
+    const suggestionParams = buildCorrectionSequence(transitionEffects, context, match);
 
     // IF:  array has multiple entries, then build the preservation-transform as below, including the deleteLeft.
     // If not, don't make one!
@@ -451,7 +455,6 @@ export function buildCorrectionSequence(
   transitionEffects: ReturnType<typeof determineSuggestionRange>,
   context: Context,
   match: Readonly<TokenResult>,
-  costFactor: number
 ) {
   const { deleteLeft } = transitionEffects;
 
@@ -463,6 +466,21 @@ export function buildCorrectionSequence(
     deleteLeft: 0,
   }
 
+  /* If we're dealing with the FIRST keystroke of a new sequence, we'll **dramatically** boost
+    * the exponent to ensure only VERY nearby corrections have a chance of winning, and only if
+    * there are significantly more likely words.  We only need this to allow very minor fat-finger
+    * adjustments for 100% keystroke-sequence corrections in order to prevent finickiness on
+    * key borders.
+    *
+    * Technically, the probabilities this produces won't be normalized as-is... but there's no
+    * true NEED to do so for it, even if it'd be 'nice to have'.  Consistently tracking when
+    * to apply it could become tricky, so it's simpler to leave out.
+    *
+    * Worst-case, it's possible to temporarily add normalization if a code deep-dive
+    * is needed in the future.
+    */
+  const costFactor = (match.inputCount <= 1) ? ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT : 1;
+
   const rootCost = match.totalCost;
   const predictionRoot = {
     sample: correctionTransform,
@@ -492,13 +510,12 @@ export function buildCorrectionSequence(
 export function determineTokenizedCorrectionSequence(
   transition: ContextTransition,
   tokenization: ContextTokenization,
-  match: Readonly<TokenResult>,
-  costFactor: number
+  match: Readonly<TokenResult>
 ): PredictionParameters {
   const applicationTarget = transition.base.displayTokenization;
   const transitionParams = determineSuggestionRange(applicationTarget.tokens, tokenization.tokens, (a, b) => a.spaceId == b.spaceId);
 
-  const suggestionParams = buildCorrectionSequence(transitionParams, transition.base.context, match, costFactor);
+  const suggestionParams = buildCorrectionSequence(transitionParams, transition.base.context, match);
 
   // The correction should always be based on the most recent external
   // transform/transcription ID.
@@ -622,28 +639,13 @@ export async function correctAndEnumerate(
       continue;
     }
 
-    /* If we're dealing with the FIRST keystroke of a new sequence, we'll **dramatically** boost
-     * the exponent to ensure only VERY nearby corrections have a chance of winning, and only if
-     * there are significantly more likely words.  We only need this to allow very minor fat-finger
-     * adjustments for 100% keystroke-sequence corrections in order to prevent finickiness on
-     * key borders.
-     *
-     * Technically, the probabilities this produces won't be normalized as-is... but there's no
-     * true NEED to do so for it, even if it'd be 'nice to have'.  Consistently tracking when
-     * to apply it could become tricky, so it's simpler to leave out.
-     *
-     * Worst-case, it's possible to temporarily add normalization if a code deep-dive
-     * is needed in the future.
-     */
-    const costFactor = (tokenization.tail.inputCount <= 1) ? ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT : 1;
-
-    const predictionPrep = determineTokenizedCorrectionSequence(transition, tokenization, match, costFactor);
+    const predictionPrep = determineTokenizedCorrectionSequence(transition, tokenization, match);
     const predictions = predictFromCorrectionSequence(lexicalModel, predictionPrep.tokenizedCorrection, predictionPrep.rootContext, transition.transitionId);
     predictions.forEach((p) => predictionPrep.applyInPost(p));
 
     // Only set 'best correction' cost when a correction ACTUALLY YIELDS predictions.
-    if(predictions.length > 0 && bestCorrectionCost === undefined) {
-      bestCorrectionCost = match.totalCost * costFactor;
+    if(predictions.length > 0 && (bestCorrectionCost === undefined || bestCorrectionCost > match.totalCost)) {
+      bestCorrectionCost = match.totalCost;
     }
 
     // If we're getting the same prediction again, it's lower-cost.  Update!

diff --git a/...ve-text/worker-thread/prediction-helpers/determine-tokenized-correction-sequence.tests.ts b/...ve-text/worker-thread/prediction-helpers/determine-tokenized-correction-sequence.tests.ts
@@ -15,7 +15,7 @@ import * as wordBreakers from '@keymanapp/models-wordbreakers';
 import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs';
 import { KMWString } from '@keymanapp/web-utils';
 
-import { determineTokenizedCorrectionSequence, models, ContextState, ContextToken, ContextTokenization, CorrectionPredictionTuple } from "@keymanapp/lm-worker/test-index";
+import { determineTokenizedCorrectionSequence, models, ContextState, ContextToken, ContextTokenization, CorrectionPredictionTuple, ModelCompositor } from "@keymanapp/lm-worker/test-index";
 
 import Context = LexicalModelTypes.Context;
 import ProbabilityMass = LexicalModelTypes.ProbabilityMass;
@@ -54,10 +54,10 @@ describe('determineTokenizedCorrectionSequence', () => {
       transition.final.displayTokenization, {
         matchString: 'fo',
         inputSamplingCost: -Math.log(trueInput.p),
+        inputCount: 2,
         knownCost: 0,
         totalCost: -Math.log(trueInput.p)
-      },
-      1
+      }
     );
 
     assert.deepEqual({...results.rootContext, casingForm: results.rootContext.casingForm}, {
@@ -104,10 +104,10 @@ describe('determineTokenizedCorrectionSequence', () => {
       transition.final.displayTokenization, {
         matchString: ' ',
         inputSamplingCost: -Math.log(trueInput.p),
+        inputCount: 1,
         knownCost: 0,
         totalCost: -Math.log(trueInput.p)
-      },
-      1
+      }
     );
 
     assert.deepEqual({...results.rootContext, casingForm: results.rootContext.casingForm}, {
@@ -118,15 +118,12 @@ describe('determineTokenizedCorrectionSequence', () => {
       endOfBuffer: true
     });
 
-    assert.deepEqual(results.tokenizedCorrection, [
-      {
-        sample: {
-          insert: ' ',
-          deleteLeft: 0
-        },
-        p: trueInput.p
-      }
-    ]);
+    assert.equal(results.tokenizedCorrection.length, 1);
+    assert.deepEqual(results.tokenizedCorrection[0].sample, {
+      insert: ' ',
+      deleteLeft: 0
+    });
+    assert.approximately(results.tokenizedCorrection[0].p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000);
   });
 
   it(`properly analyzes common-case word-start - beginning a new token`, () => {
@@ -154,10 +151,10 @@ describe('determineTokenizedCorrectionSequence', () => {
       transition.final.displayTokenization, {
         matchString: 'f',
         inputSamplingCost: -Math.log(trueInput.p),
+        inputCount: 1,
         knownCost: 0,
         totalCost: -Math.log(trueInput.p)
-      },
-      1
+      }
     );
 
     assert.deepEqual({...results.rootContext, casingForm: results.rootContext.casingForm}, {
@@ -168,15 +165,13 @@ describe('determineTokenizedCorrectionSequence', () => {
       endOfBuffer: true
     });
 
-    assert.deepEqual(results.tokenizedCorrection, [
-      {
-        sample: {
-          insert: 'f',
-          deleteLeft: 0
-        },
-        p: trueInput.p
-      }
-    ]);
+
+    assert.equal(results.tokenizedCorrection.length, 1);
+    assert.deepEqual(results.tokenizedCorrection[0].sample, {
+      insert: 'f',
+      deleteLeft: 0
+    });
+    assert.approximately(results.tokenizedCorrection[0].p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000);
   });
 
   it(`properly analyzes post-merge case`, () => {
@@ -210,10 +205,10 @@ describe('determineTokenizedCorrectionSequence', () => {
       transition.final.displayTokenization, {
         matchString: 'can\'t',
         inputSamplingCost: -Math.log(trueInput.p),
+        inputCount: 5,
         knownCost: 0,
         totalCost: -Math.log(trueInput.p)
-      },
-      1
+      }
     );
 
     assert.deepEqual({...results.rootContext, casingForm: results.rootContext.casingForm}, {
@@ -261,10 +256,10 @@ describe('determineTokenizedCorrectionSequence', () => {
       transition.final.displayTokenization, {
         matchString: ' ',
         inputSamplingCost: -Math.log(trueInput.p),
+        inputCount: 1,
         knownCost: 0,
         totalCost: -Math.log(trueInput.p)
-      },
-      1
+      }
     );
 
     assert.deepEqual({...results.rootContext, casingForm: results.rootContext.casingForm}, {
@@ -275,15 +270,13 @@ describe('determineTokenizedCorrectionSequence', () => {
       endOfBuffer: true
     });
 
-    assert.deepEqual(results.tokenizedCorrection, [
-      {
-        sample: {
-          insert: ' ',
-          deleteLeft: 0
-        },
-        p: trueInput.p
-      }
-    ]);
+
+    assert.equal(results.tokenizedCorrection.length, 1);
+    assert.deepEqual(results.tokenizedCorrection[0].sample, {
+      insert: ' ',
+      deleteLeft: 0
+    });
+    assert.approximately(results.tokenizedCorrection[0].p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000);
   });
 
   it(`properly analyzes complex transition - multi-token replacement`, () => {
@@ -310,10 +303,10 @@ describe('determineTokenizedCorrectionSequence', () => {
       transition.final.displayTokenization, {
         matchString: 'd',
         inputSamplingCost: -Math.log(trueInput.p),
+        inputCount: 1,
         knownCost: 0,
         totalCost: -Math.log(trueInput.p)
-      },
-      1
+      }
     );
 
     // Large-scale deletions will receive enhanced handling soon.  But, for now, it's
@@ -326,15 +319,13 @@ describe('determineTokenizedCorrectionSequence', () => {
       endOfBuffer: true
     });
 
-    assert.deepEqual(results.tokenizedCorrection, [
-      {
-        sample: {
-          insert: 'd',
-          deleteLeft: 0
-        },
-        p: trueInput.p
-      }
-    ]);
+
+    assert.equal(results.tokenizedCorrection.length, 1);
+    assert.deepEqual(results.tokenizedCorrection[0].sample, {
+      insert: 'd',
+      deleteLeft: 0
+    });
+    assert.approximately(results.tokenizedCorrection[0].p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000);
 
     const dummiedTuple: CorrectionPredictionTuple = {
       prediction: {