Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,42 @@ function textToCharTransforms(text: string, transformId?: number): Transform[] {
[...text].map(insert => ({insert, deleteLeft: 0}));
}


/**
* Implements an interface similar to ContextToken that is useful for handling
* cases that should not be considered correctable.
*/
export interface ContextTokenLike {
/**
* Generates text corresponding to the net effects of the most likely inputs
* received that can correspond to the represented token.
*/
exampleInput: string;

/**
* Reports the length in codepoints of corrected text represented by the
* current token.
*/
codepointLength: number;

/**
* Whether or not the token is likely still being edited by the user (due to
* adjacency of the caret)
*/
isPartial?: boolean;

/**
* Gets a compact string-based representation of `inputRange` that
* maps compatible token source ranges to each other.
*/
sourceRangeKey?: string;
}

/**
* Represents cached data about one token (either a word or a unit of whitespace)
* in the context and associated correction-search progress and results.
*/
export class ContextToken {
export class ContextToken implements ContextTokenLike {
/**
* Indicates whether or not the token is considered whitespace.
*/
Expand All @@ -54,6 +85,10 @@ export class ContextToken {
}
private _searchModule: SearchQuotientNode;

/**
* Whether or not the token is likely still being edited by the user (due to
* adjacency of the caret)
*/
isPartial: boolean;

/**
Expand Down Expand Up @@ -118,6 +153,14 @@ export class ContextToken {
return new ContextToken(searchModule, isPartial);
}

/**
* Reports the length in codepoints of corrected text represented by the
* current token.
*/
get codepointLength() {
return this._searchModule.codepointLength;
}

get inputCount() {
return this._searchModule.inputCount;
}
Expand Down Expand Up @@ -155,7 +198,7 @@ export class ContextToken {

/**
* Generates text corresponding to the net effects of the most likely inputs
* received that can correspond to the current instance.
* received that can correspond to the represented token.
*/
get exampleInput(): string {
return this.searchModule.bestExample.text;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import { defaultWordbreaker, WordBreakProperty } from '@keymanapp/models-wordbre

import TransformUtils from './transformUtils.js';
import { determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js';
import { ContextTokenLike } from './correction/context-token.js';
import { ContextTokenization } from './correction/context-tokenization.js';
import { ContextTracker } from './correction/context-tracker.js';
import { ContextToken } from './correction/context-token.js';
import { ContextState, determineContextSlideTransform } from './correction/context-state.js';
import { ContextTransition } from './correction/context-transition.js';
import { ExecutionTimer } from './correction/execution-timer.js';
Expand Down Expand Up @@ -75,6 +75,43 @@ export const CORRECTION_SEARCH_THRESHOLDS = {
REPLACEMENT_SEARCH_THRESHOLD: 4 as const // e^-4 = 0.0183156388. Allows "80%" of an extra edit.
}

/**
* Represents the minimum replacement range and effects required for
* suggestions.
*
* These values are based on properties of the transition from their base
* context-tokenization to their target-tokenization (and its represented
* context variant).
*/
export interface SuggestionReplacement<T extends ContextTokenLike> {
/**
* Tokens lost from the base context-tokenization in the target
* context-tokenization due to the transition event.
*
* These are implicitly replaced when applying Suggestions.
*/
tokensToRemove: T[],

/**
* Tokens added (after the removed tokens) to the base context-tokenization to
* produce the target context-tokenization.
*
* As these are "new" tokens generated by the transition, Suggestions should represent
* corrections and predictions rooted upon these tokens.
*/
tokensToPredict: T[],

/**
* Indicates the total range of left-deletion needed when applying suggestions.
*/
deleteLeft: number,

/**
* Indicates the id of the underlying context transition.
*/
transitionId?: number
}

/**
* Collates information related to suggestions during the suggestion generation
* process.
Expand Down Expand Up @@ -397,53 +434,67 @@ export function determineSuggestionAlignment(
* @param variantForSuggestions
* @returns
*/
export function determineSuggestionRange(
userContextTokenization: ContextTokenization,
variantForSuggestions: ContextTokenization
): { tokensToRemove: ContextToken[], tokensToPredict: ContextToken[] } {
// Assumption: spaceIds monotonically increase as new ones are generated.
// Given this, we backtrace on the token tails until finding a spot where the
// spaceIds match, dropping any that are newer than the last found in the
// other.
//
// We full-replace all tokens affected by an applied suggestion, so if there's
// a mismatch between the final form of a token, that implies that suggestions
// would replace the original form of the token anyway.
const tokenSetA = userContextTokenization.tokens.slice();
const tokenSetB = variantForSuggestions.tokens.slice();

const tokensToRemove: ContextToken[] = [];
const tokensToPredict: ContextToken[] = [];

const tailIdFor = (tokens: ContextToken[]) => tokens[tokens.length-1]?.spaceId ?? -1;
let tailOfA = tailIdFor(tokenSetA);
let tailOfB = tailIdFor(tokenSetB);
while(tailOfA != tailOfB) {
if(tailOfA < tailOfB) {
tokensToPredict.push(tokenSetB.pop());
tailOfB = tailIdFor(tokenSetB);
} else {
tokensToRemove.push(tokenSetA.pop());
tailOfA = tailIdFor(tokenSetA);
export function determineSuggestionRange<T extends ContextTokenLike>(
userContextTokenization: T[],
variantForSuggestions: T[],
equalityChecker: (a: T, b: T) => boolean
): SuggestionReplacement<T> {
// Add null/undefined guards to the equality checker.
const temp = equalityChecker;
equalityChecker = (a, b) => {
if(!a || !b) {
return false;
}

return temp(a, b);
}

const deleteLeftCalc = (tokenSet: T[], predictCount: number) => {
// TODO: once we start activating multi-tokenization for real, only the
// 'reduce' component should remain.
return (predictCount > 1)
? (tokenSet[tokenSet.length - 1]?.codepointLength ?? 0)
: tokenSet.reduce((prev, curr) => prev + curr.codepointLength, 0);
}

const tokenSetA = userContextTokenization.slice();
const tokenSetB = variantForSuggestions.slice();

let aHeadIndexInB = tokenSetB.findIndex((t) => equalityChecker(t, tokenSetA[0]));
let bHeadIndexInA = tokenSetA.findIndex((t) => equalityChecker(t, tokenSetB[0]));

if(aHeadIndexInB == -1 && bHeadIndexInA == -1) {
// Both are full replacements.
return {
tokensToRemove: tokenSetA,
tokensToPredict: tokenSetB,
deleteLeft: deleteLeftCalc(tokenSetA, tokenSetB.length)
}
} else if(aHeadIndexInB != 0 && bHeadIndexInA != 0) {
throw new Error("Leading edge of context should not differ in both tokenizations.");
}

let tailOffset = 0;
while(equalityChecker(tokenSetA[bHeadIndexInA + tailOffset], tokenSetB[aHeadIndexInB + tailOffset])) {
tailOffset++;
}

tokensToPredict.reverse();
const tokensToRemove: T[] = tokenSetA.slice(bHeadIndexInA + tailOffset);
const tokensToPredict: T[] = tokenSetB.slice(aHeadIndexInB + tailOffset);

// Can occur when backspacing to the end of a previous word.
if(tokensToPredict.length == 0) {
if(tokenSetA.length == 0 || tokenSetB.length == 0) {
throw new Error("Invalid state - a tokenization is missing expected tokens");
}
tokensToRemove.push(tokenSetA.pop());
tokensToPredict.push(tokenSetB.pop());
tokensToRemove.unshift(tokenSetA[bHeadIndexInA + tailOffset - 1]);
tokensToPredict.unshift(tokenSetB[aHeadIndexInB + tailOffset - 1]);
}

tokensToRemove.reverse();

return {
tokensToRemove,
tokensToPredict
tokensToPredict,
deleteLeft: deleteLeftCalc(tokensToRemove, tokensToPredict.length)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,14 @@ function buildQuickBrownFixture() {
};
}

const tokenEquality = (a: ContextToken, b: ContextToken) => a.spaceId == b.spaceId;

describe('determineSuggestionRange', () => {
it('adjusts the final token if no tokenization changes occur', () => {
const fixture = buildQuickBrownFixture();
const noChange = fixture.variations.noChange;

const analysis = determineSuggestionRange(fixture.baseTokenization, noChange.tokenization);
const analysis = determineSuggestionRange(fixture.baseTokenization.tokens, noChange.tokenization.tokens, tokenEquality);

assert.sameOrderedMembers(analysis.tokensToRemove, noChange.range.tokensToRemove);
assert.sameOrderedMembers(analysis.tokensToPredict, noChange.range.tokensToPredict);
Expand All @@ -185,7 +187,7 @@ describe('determineSuggestionRange', () => {
const fixture = buildQuickBrownFixture();
const plainInsert = fixture.variations.plainInsert;

const analysis = determineSuggestionRange(fixture.baseTokenization, plainInsert.tokenization);
const analysis = determineSuggestionRange(fixture.baseTokenization.tokens, plainInsert.tokenization.tokens, tokenEquality);

assert.sameOrderedMembers(analysis.tokensToRemove, plainInsert.range.tokensToRemove);
assert.sameOrderedMembers(analysis.tokensToPredict, plainInsert.range.tokensToPredict);
Expand All @@ -195,7 +197,7 @@ describe('determineSuggestionRange', () => {
const fixture = buildQuickBrownFixture();
const newTokenInsert = fixture.variations.newTokenInsert;

const analysis = determineSuggestionRange(fixture.baseTokenization, newTokenInsert.tokenization);
const analysis = determineSuggestionRange(fixture.baseTokenization.tokens, newTokenInsert.tokenization.tokens, tokenEquality);

assert.sameOrderedMembers(analysis.tokensToRemove, newTokenInsert.range.tokensToRemove);
assert.sameOrderedMembers(analysis.tokensToPredict, newTokenInsert.range.tokensToPredict);
Expand All @@ -205,7 +207,7 @@ describe('determineSuggestionRange', () => {
const fixture = buildQuickBrownFixture();
const charReplace = fixture.variations.charReplace;

const analysis = determineSuggestionRange(fixture.baseTokenization, charReplace.tokenization);
const analysis = determineSuggestionRange(fixture.baseTokenization.tokens, charReplace.tokenization.tokens, tokenEquality);

assert.sameOrderedMembers(analysis.tokensToRemove, charReplace.range.tokensToRemove);
assert.sameOrderedMembers(analysis.tokensToPredict, charReplace.range.tokensToPredict);
Expand All @@ -215,7 +217,7 @@ describe('determineSuggestionRange', () => {
const fixture = buildQuickBrownFixture();
const del5Insert5 = fixture.variations.del5Insert5;

const analysis = determineSuggestionRange(fixture.baseTokenization, del5Insert5.tokenization);
const analysis = determineSuggestionRange(fixture.baseTokenization.tokens, del5Insert5.tokenization.tokens, tokenEquality);

assert.sameOrderedMembers(analysis.tokensToRemove, del5Insert5.range.tokensToRemove);
assert.sameOrderedMembers(analysis.tokensToPredict, del5Insert5.range.tokensToPredict);
Expand All @@ -225,7 +227,7 @@ describe('determineSuggestionRange', () => {
const fixture = buildQuickBrownFixture();
const eraseToken = fixture.variations.eraseToken;

const analysis = determineSuggestionRange(fixture.baseTokenization, eraseToken.tokenization);
const analysis = determineSuggestionRange(fixture.baseTokenization.tokens, eraseToken.tokenization.tokens, tokenEquality);

assert.sameOrderedMembers(analysis.tokensToRemove, eraseToken.range.tokensToRemove);
assert.sameOrderedMembers(analysis.tokensToPredict, eraseToken.range.tokensToPredict);
Expand All @@ -235,7 +237,7 @@ describe('determineSuggestionRange', () => {
const fixture = buildQuickBrownFixture();
const deleteToBound = fixture.variations.deleteToBound;

const analysis = determineSuggestionRange(fixture.baseTokenization, deleteToBound.tokenization);
const analysis = determineSuggestionRange(fixture.baseTokenization.tokens, deleteToBound.tokenization.tokens, tokenEquality);

assert.sameOrderedMembers(analysis.tokensToRemove, deleteToBound.range.tokensToRemove);
assert.sameOrderedMembers(analysis.tokensToPredict, deleteToBound.range.tokensToPredict);
Expand All @@ -255,7 +257,7 @@ describe('determineSuggestionRange', () => {
null
)

const analysis = determineSuggestionRange(originalQuickBrownTokenization, foxVsAlligatorTokenization);
const analysis = determineSuggestionRange(originalQuickBrownTokenization.tokens, foxVsAlligatorTokenization.tokens, tokenEquality);

assert.sameOrderedMembers(
analysis.tokensToRemove,
Expand All @@ -279,7 +281,7 @@ describe('determineSuggestionRange', () => {
null
)

const analysis = determineSuggestionRange(originalQuickBrownTokenization, dogsAndCatTokenization);
const analysis = determineSuggestionRange(originalQuickBrownTokenization.tokens, dogsAndCatTokenization.tokens, tokenEquality);

assert.sameOrderedMembers(
analysis.tokensToRemove,
Expand Down
Loading