diff --git a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts index 1d4893aab49..8d9f6a938e6 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts @@ -1,15 +1,13 @@ import * as models from '@keymanapp/models-templates'; import { LexicalModelTypes } from '@keymanapp/common-types'; -import { applySuggestionCasing, correctAndEnumerate, createDefaultKeep, dedupeSuggestions, finalizeSuggestions, predictionAutoSelect, processSimilarity, toAnnotatedSuggestion, tupleDisplayOrderSort } from './predict-helpers.js'; -import { detectCurrentCasing, determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js'; -import TransformUtils from './transformUtils.js'; - import * as correction from './correction/index.js' +import { applySuggestionCasing, compositeIntermediatePredictions, correctAndEnumerate, createDefaultKeep, dedupeSuggestions, finalizeSuggestions, predictionAutoSelect, processSimilarity, toAnnotatedSuggestion, tupleDisplayOrderSort } from './predict-helpers.js'; +import { determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js'; + import { ContextTracker } from './correction/context-tracker.js'; import { DEFAULT_ALLOTTED_CORRECTION_TIME_INTERVAL } from './correction/distance-modeler.js'; -import CasingForm = LexicalModelTypes.CasingForm; import Configuration = LexicalModelTypes.Configuration; import Context = LexicalModelTypes.Context; import Distribution = LexicalModelTypes.Distribution; @@ -125,24 +123,6 @@ export class ModelCompositor { const transformId = inputTransform.id; this.initContextTracker(context, transformId); - const allowBksp = TransformUtils.isBackspace(inputTransform); - const allowWhitespace = TransformUtils.isWhitespace(inputTransform); - - const postContext = models.applyTransform(inputTransform, context); - - // TODO: It would be best for the correctAndEnumerate method to return the - // suggestion's prefix, as it already has lots of logic oriented to this. - // The context-tracker used there with v14+ models can determine this more - // robustly. - const truePrefix = this.wordbreak(postContext); - // Only use of `truePrefix`. - const basePrefix = (allowBksp || allowWhitespace) ? truePrefix : this.wordbreak(context); - - // Used to restore whitespaces if operations would remove them. - const currentCasing: CasingForm = lexicalModel.languageUsesCasing - ? detectCurrentCasing(lexicalModel, postContext) - : null; - // Section 1: determine 'prediction roots' - enumerate corrections from most to least likely, // searching for results that yield viable predictions from the model. @@ -160,9 +140,9 @@ export class ModelCompositor { // Properly capitalizes the suggestions based on the existing context casing state. // This may result in duplicates if multiple casing options exist within the // lexicon for a word. (Example: "Apple" the company vs "apple" the fruit.) - for(let tuple of rawPredictions) { - if(currentCasing && currentCasing != 'lower') { - applySuggestionCasing(tuple.components.prediction, basePrefix, this.lexicalModel, currentCasing); + if(lexicalModel.languageUsesCasing) { + for(let tuple of rawPredictions) { + tuple.components.forEach((component) => applySuggestionCasing(component, this.lexicalModel)); } } @@ -171,9 +151,10 @@ export class ModelCompositor { // We want to dedupe before trimming the list so that we can present a full set // of viable distinct suggestions if available. - const deduplicatedSuggestionTuples = dedupeSuggestions(this.lexicalModel, rawPredictions, context); + const deduplicatedSuggestionTuples = dedupeSuggestions(this.lexicalModel, compositeIntermediatePredictions(rawPredictions), context); // Needs "casing" to be applied first. + const postContext = postContextState?.context ?? models.applyTransform(inputTransform, context); const hasExistingKeep = processSimilarity(this.lexicalModel, deduplicatedSuggestionTuples, context, postContext); // If no existing suggestion directly matches the user-visible version of @@ -222,6 +203,12 @@ export class ModelCompositor { } } + if(suggestions.filter((s) => s.tag == 'keep').length > 1) { + throw new Error(`Unexpected state: multiple keep suggestions exist: ${JSON.stringify(suggestions.filter((s) => s.tag == 'keep'))}`); + } else if(suggestions.filter((s) => s.tag == 'revert').length > 1) { + throw new Error(`Unexpected state: multiple revert suggestions exist! ${JSON.stringify(suggestions.filter((s) => s.tag == 'revert'))}`); + } + // Store the suggestions on the final token of the current context state (if it exists). // Or, once phrase-level suggestions are possible, on whichever token serves as each prediction's root. if(postContextState) { diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts index 5ccedc31a7c..936837127ae 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts @@ -4,7 +4,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types'; import { defaultWordbreaker, WordBreakProperty } from '@keymanapp/models-wordbreakers'; import TransformUtils from './transformUtils.js'; -import { determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js'; +import { detectCurrentCasing, determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js'; import { ContextTokenLike } from './correction/context-token.js'; import { ContextTokenization, mapWhitespacedTokenization } from './correction/context-tokenization.js'; import { ContextTracker } from './correction/context-tracker.js'; @@ -114,6 +114,21 @@ export interface SuggestionReplacement { transitionId?: number } +export interface TokenizedPredictionData { + /** + * The potential Suggestion + */ + prediction: Suggestion, + /** + * The correction upon which the Suggestion is based + */ + correction: string, + /** + * The unkeyed original string underlying the correction/prediction. + */ + casingRoot: string +} + export interface CompositedPredictionData { /** * The potential Suggestion (or Keep) @@ -176,6 +191,19 @@ export interface PredictionMetadata { preservationTransform?: Transform; } +export interface IntermediateTokenizedPrediction { + /** + * Contains the tokenized components to be used to construct a full + * predictive-text Suggestion, as well as data about the source for each + * component. + */ + components: TokenizedPredictionData[]; + /** + * Tracks common intermediate prediction data, such as its underlying probabilities and its similarity to the actual context. + */ + metadata: PredictionMetadata; +} + export interface IntermediateCompositedPrediction { /** * Contains the fully composited predictive-text Suggestion and its underlying correction string. @@ -187,7 +215,7 @@ export interface IntermediateCompositedPrediction { metadata: PredictionMetadata; } -type IntermediatePrediction = IntermediateCompositedPrediction; +type IntermediatePrediction = IntermediateCompositedPrediction | IntermediateTokenizedPrediction; /** * An enum to be used when categorizing the level of similarity between @@ -279,6 +307,9 @@ export function determineTraversallessCorrectionSequences( }); const suggestionParams = buildCorrectionSequence(transitionEffects, context, new TokenizationResultMapping([correctionRoots[correctionRoots.length - 1]], null)); + if(transformId !== undefined) { + suggestionParams.tokens.forEach((token) => token.correction.sample.id = transformId); + } const tokenizationMapping = mapWhitespacedTokenization(tokenization.left.map((t) => { return {exampleInput: t.text, codepointLength: KMWString.length(t.text)} }), lexicalModel, correction.sample); const tokenizedCorrection = tokenizationMapping.tokenizedTransform; @@ -294,9 +325,6 @@ export function determineTraversallessCorrectionSequences( ...suggestionParams, applyInPost: (p) => { p.metadata.preservationTransform = preservationTransform; - if(transformId) { - p.components.prediction.transformId = transformId; - } } }) } @@ -490,27 +518,37 @@ export interface PredictionParameters { * "unchanged" (root) context used for that suggestion will include the * changes from the entry at index 0 (or possibly, a suggestion derived from it). */ - tokenizedCorrection: ProbabilityMass[], + tokens: { + correction: ProbabilityMass, + casingRoot: string, + autoSelectable: boolean + }[], + + deleteLeft: number; /** * A closure to be applied to the generated suggestion's metadata. * @param entry * @returns */ - applyInPost: (entry: IntermediateCompositedPrediction) => void + applyInPost: (entry: IntermediateTokenizedPrediction) => void; } export function buildCorrectionSequence( transitionEffects: SuggestionReplacement, context: Context, tokenizationCorrection: TokenizationResultMapping -) { +): Omit { const { deleteLeft } = transitionEffects; const rootContext = models.applyTransform({insert: '', deleteLeft}, context); // Replace the existing context with the correction. - const tokenizedCorrections = tokenizationCorrection.matchedResult.map((correction, i) => { + const orderedTokens = tokenizationCorrection.matchingSpace?.orderedTokens; + const tokens: PredictionParameters['tokens'] = []; + + for(let i = 0; i < tokenizationCorrection.matchedResult.length; i++) { + const correction = tokenizationCorrection.matchedResult[i]; /* If we're dealing with the FIRST keystroke of a new sequence, we'll **dramatically** boost * the exponent to ensure only VERY nearby corrections have a chance of winning, and only if * there are significantly more likely words. We only need this to allow very minor fat-finger @@ -539,12 +577,17 @@ export function buildCorrectionSequence( entry.sample.id = transitionEffects.transitionId; } - return entry; - }); + tokens.push({ + correction: entry, + casingRoot: orderedTokens ? orderedTokens[i].exampleInput : entry.sample.insert, + autoSelectable: correctionValidForAutoSelect(entry.sample.insert) + }); + } return { rootContext, - tokenizedCorrection: tokenizedCorrections + tokens, + deleteLeft }; } @@ -576,18 +619,18 @@ export function determineTokenizedCorrectionSequence( // The correction should always be based on the most recent external // transform/transcription ID. if(transition.transitionId !== undefined) { - suggestionParams.tokenizedCorrection.map((t) => t.sample.id = transition.transitionId); + suggestionParams.tokens.map((t) => t.correction.sample.id = transition.transitionId); } const { deleteLeft } = transitionParams; return { ...suggestionParams, - applyInPost: (entry: IntermediateCompositedPrediction) => { + applyInPost: (entry: IntermediateTokenizedPrediction) => { entry.metadata.preservationTransform = tokenization.taillessTrueKeystroke; // // Will need an extra lookup layer if the suggestion is generated from within a cluster. // entry.baseTokenization = transition.final.tokenizationSourceMap.get(tokenization); - entry.components.prediction.transform.deleteLeft = deleteLeft; + entry.components[0].prediction.transform.deleteLeft = deleteLeft; } }; } @@ -618,7 +661,7 @@ export async function correctAndEnumerate( /** * The suggestions generated based on the user's input state. */ - rawPredictions: IntermediateCompositedPrediction[]; + rawPredictions: IntermediateTokenizedPrediction[]; /** * The id of a prior ContextTransition event that triggered a Suggestion found @@ -638,8 +681,7 @@ export async function correctAndEnumerate( const predictionData = determineTraversallessCorrectionSequences(lexicalModel, transformDistribution, context); return { rawPredictions: predictionData.flatMap((entry) => { - const predictions = predictFromCorrectionSequence(lexicalModel, entry.tokenizedCorrection, entry.rootContext, transformDistribution[0]?.sample.id); - predictions.forEach((p) => entry.applyInPost(p)); + const predictions = predictFromCorrectionSequence(lexicalModel, entry); return predictions; }) }; @@ -674,7 +716,7 @@ export async function correctAndEnumerate( const searchModules = tokenizations.map(t => t.tail.searchModule); // Only run the correction search when corrections are enabled. - let rawPredictions: IntermediateCompositedPrediction[] = []; + let rawPredictions: IntermediateTokenizedPrediction[] = []; let bestCorrectionCost: number; for await(const match of getBestTokenMatches(searchModules, timer)) { // Corrections obtained: now to predict from them! @@ -699,8 +741,7 @@ export async function correctAndEnumerate( const corrector = new TokenizationCorrector(tokenization, suggestionRange.tokensToPredict.length, () => true); const predictionPrep = determineTokenizedCorrectionSequence(transition, tokenization, new TokenizationResultMapping([match], corrector)); - const predictions = predictFromCorrectionSequence(lexicalModel, predictionPrep.tokenizedCorrection, predictionPrep.rootContext, transition.transitionId); - predictions.forEach((p) => predictionPrep.applyInPost(p)); + const predictions = predictFromCorrectionSequence(lexicalModel, predictionPrep); // Only set 'best correction' cost when a correction ACTUALLY YIELDS predictions. if(predictions.length > 0 && (bestCorrectionCost === undefined || bestCorrectionCost > match.totalCost)) { @@ -727,7 +768,7 @@ export async function correctAndEnumerate( export function shouldStopSearchingEarly( bestCorrectionCost: number, currentCorrectionCost: number, - rawPredictions: IntermediateCompositedPrediction[] + rawPredictions: IntermediateTokenizedPrediction[] ) { if(currentCorrectionCost >= bestCorrectionCost + CORRECTION_SEARCH_THRESHOLDS.MAX_SEARCH_THRESHOLD) { return true; @@ -770,101 +811,111 @@ export function shouldStopSearchingEarly( */ export function predictFromCorrectionSequence( lexicalModel: LexicalModel, - corrections: ProbabilityMass[], - rootContext: Context, - transitionId: number -): IntermediateCompositedPrediction[] { - let predictionPrefixSequence: ProbabilityMass[] = []; - let tailPredictions: ProbabilityMass[]; - - let currentContext = rootContext; + predictionPrep: PredictionParameters +): IntermediateTokenizedPrediction[] { let successfulPredictions = 0; - for(let i = 0; i < corrections.length; i++) { - const correction = corrections[i].sample; + const correctionTokens = predictionPrep.tokens; + const context = predictionPrep.rootContext; + let currentContext = context; - // Step 2: predict based on the final token. - const predictions = lexicalModel.predict(correction, currentContext); + let prefixProb = 1; + + const predictionComponents = correctionTokens.map((correctionToken, i) => { + const correctionTransform = correctionToken.correction.sample; + const predictions = lexicalModel.predict(correctionTransform, currentContext); + const transitionId = correctionTransform.id; // Failsafe: if there are no matching predictions, create a fake prediction // matching the original text. if(predictions.length != 0) { successfulPredictions++; } else { - predictions.push({ + const failbackSuggestion = { sample: { - transform: correction, - displayAs: correction.insert + transform: correctionTransform, + displayAs: correctionTransform.insert }, // It's not found in the lexicon, so we'll take a low probability for it. // // Edit penalties will be applied via the correction component separately later on. p: Math.exp(-EDIT_DISTANCE_COST_SCALE) - }); + }; + + predictions.push(failbackSuggestion); } - if(i == corrections.length - 1) { - tailPredictions = predictions; - } else { - let bestMatch = predictions.find((p) => KMWString.length(p.sample.transform.insert) == KMWString.length(correction.insert)); - if(!bestMatch) { - bestMatch = predictions[0]; + // Regardless of origin, overwrite the transform's deleteLeft value with what it should actually hold. + predictions.forEach((entry) => { + // Remove the `p` field from the Dummy model's mocked suggestions; these should not be emitted. + delete (entry.sample as Outcome).p; + + entry.sample.transform.deleteLeft = correctionTransform.deleteLeft; + if(transitionId !== undefined) { + entry.sample.transformId = transitionId; + entry.sample.transform.id = transitionId; } + }); + + // Use traversals if possible - extract the most likely entry that is on the traversal, + // rather than predicting (and possibly extending) tokens not adjacent to the caret. + // + // Also, fall back to the actual correction string should prediction not be valid here. + const isLastToken = i == correctionTokens.length - 1; + const predictionsToReturn = isLastToken ? predictions : [predictions[0]]; - predictionPrefixSequence = predictionPrefixSequence.concat(bestMatch); + if(!isLastToken) { + prefixProb *= predictions[0].p; } - // Or maybe per prediction, in some manner? - currentContext = models.applyTransform(correction, currentContext); - } + return predictionsToReturn.map((prediction) => { + return { + prediction: prediction.sample, + correction: correctionTransform.insert, + casingRoot: correctionToken.casingRoot, + // This is tagged on as an addition because we need each final + // token-prediction's probability to be available in the next loop + // below. + predictionProb: prediction.p, + autoSelectable: correctionToken.autoSelectable + }; + }); + }); - if(!successfulPredictions) { + if(successfulPredictions == 0) { return []; } - const predictions: IntermediateCompositedPrediction[] = tailPredictions.map((p) => { - // Concat corrections + predictions for their components. - const predictionSequence = [...predictionPrefixSequence, p]; - const fullPrediction: ProbabilityMass = predictionSequence.reduce((prev, curr) => { - return { - sample: { - transform: models.buildMergedTransform(prev.sample.transform, curr.sample.transform), - displayAs: prev.sample.displayAs + curr.sample.displayAs - }, - p: prev.p * curr.p - }; - }, {sample: {transform: {insert: '', deleteLeft: 0}, displayAs: ''}, p: 1}); - - const fullCorrection: ProbabilityMass = corrections.reduce((prev, curr) => { - return { - sample: prev.sample + curr.sample.insert, - p: prev.p * curr.p - } - }, {sample: '', p: 1}) + // Constructs a common prefix for all but the final token's component. + const correctionCost = correctionTokens.reduce((accum, curr) => accum * curr.correction.p, 1); + const predictionPrefix = predictionComponents + .slice(0, predictionComponents.length-1) + .map((p) => p[0]); - if(transitionId !== undefined) { - fullPrediction.sample.transform.id = transitionId; - fullPrediction.sample.transformId = transitionId; - } + const completePredictionTuples: IntermediateTokenizedPrediction[] = predictionComponents[predictionComponents.length-1].map((tuple) => { + const predictionCost = tuple.predictionProb * prefixProb; - return { - components: { - prediction: fullPrediction.sample, - correction: fullCorrection.sample - }, + const returnVal: IntermediateTokenizedPrediction = { + components: [...predictionPrefix, tuple], metadata: { probabilities: { - prediction: fullPrediction.p, - correction: fullCorrection.p, - total: fullPrediction.p * fullCorrection.p + prediction: predictionCost, + correction: correctionCost, + total: predictionCost * correctionCost }, - autoSelectable: correctionValidForAutoSelect(fullCorrection.sample), + autoSelectable: tuple.autoSelectable, matchLevel: SuggestionSimilarity.none } - }; + } + + returnVal.components[0].prediction.transform.deleteLeft = predictionPrep.deleteLeft; + + return returnVal; }); - return predictions; + completePredictionTuples.forEach((pt) => predictionPrep.applyInPost(pt)); + + return completePredictionTuples; } /** @@ -875,18 +926,60 @@ export function predictFromCorrectionSequence( * @param lexicalModel * @param casingForm */ -export function applySuggestionCasing(suggestion: Suggestion, baseWord: string, lexicalModel: LexicalModel, casingForm: CasingForm) { - // Step 1: does the suggestion replace the whole word? If not, we should extend the suggestion to do so. - let unchangedLength = KMWString.length(baseWord) - suggestion.transform.deleteLeft; +export function applySuggestionCasing(predictionToken: TokenizedPredictionData, lexicalModel: LexicalModel) { + const suggestion = predictionToken.prediction; + + // Step 0: our pattern for generating predictions and corrections already + // enforces them to encompass the whole word. - if(unchangedLength > 0) { - suggestion.transform.deleteLeft += unchangedLength; - suggestion.transform.insert = KMWString.substr(baseWord, 0, unchangedLength) + suggestion.transform.insert; + // Step 1: detect the original token's casing + let casingForm: CasingForm; + + // If we are using the context-tracking engine (when traversals are enabled), + // we just leverage the context token's exampleInput to determine casing. + // + // If it's not available, the correction entry reflects a word-broken piece of + // the original context, with its original casing - so we use that instead. + let casingRoot = predictionToken.casingRoot ? predictionToken.casingRoot : predictionToken.correction; + if(!casingRoot) { + // There's no text in place to verify casing expectations; just leave it + // unchanged. + return; } + casingForm = detectCurrentCasing(lexicalModel, { + left: casingRoot, + startOfBuffer: true, + endOfBuffer: true + }); + // Step 2: Now that the transform affects the whole word, we may safely apply casing rules. - suggestion.transform.insert = lexicalModel.applyCasing(casingForm, suggestion.transform.insert); - suggestion.displayAs = lexicalModel.applyCasing(casingForm, suggestion.displayAs); + if(casingForm && casingForm != 'lower') { + suggestion.transform.insert = lexicalModel.applyCasing(casingForm, suggestion.transform.insert); + suggestion.displayAs = lexicalModel.applyCasing(casingForm, suggestion.displayAs); + } +} + +export function compositeIntermediatePredictions(predictions: IntermediateTokenizedPrediction[]): IntermediateCompositedPrediction[] { + return predictions.map((predictionData) => { + const components = predictionData.components; + + return { + components: components.reduce((total, current) => { + const mergedTransform = models.buildMergedTransform(total.prediction.transform, current.prediction.transform); + const mergedDisplayAs = total.prediction.displayAs + current.prediction.displayAs + + return { + prediction: {...total.prediction, transform: mergedTransform, displayAs: mergedDisplayAs}, + correction: total.correction + current.correction + } + }, { + prediction: {...components[0].prediction, transform: { insert: '', deleteLeft: 0 }, displayAs: ''}, + correction: '' + }), + metadata: predictionData.metadata + }; + }); } /** @@ -1054,6 +1147,7 @@ export function createDefaultKeep( let keepOption = toAnnotatedSuggestion(lexicalModel, keepSuggestion, 'keep'); if(inputTransform.id !== undefined) { keepOption.transformId = inputTransform.id; + keepOption.transform.id = inputTransform.id; } keepOption.matchesModel = false; diff --git a/web/src/test/auto/headless/engine/interfaces/prediction/predictionContext.tests.ts b/web/src/test/auto/headless/engine/interfaces/prediction/predictionContext.tests.ts index b0916069edb..f34006cec5d 100644 --- a/web/src/test/auto/headless/engine/interfaces/prediction/predictionContext.tests.ts +++ b/web/src/test/auto/headless/engine/interfaces/prediction/predictionContext.tests.ts @@ -21,32 +21,32 @@ LMLayerWorker.loadModel(new models.DummyModel({ const appleDummySuggestionSets: Suggestion[][] = [[ // Set 1: { - transform: { insert: 'e', deleteLeft: 0}, + transform: { insert: 'apple', deleteLeft: 0}, displayAs: 'apple', }, { - transform: { insert: 'y', deleteLeft: 0}, + transform: { insert: 'apply', deleteLeft: 0}, displayAs: 'apply' }, { - transform: { insert: 'es', deleteLeft: 0}, + transform: { insert: 'apples', deleteLeft: 0}, displayAs: 'apples' } ], [ // Set 2: { - transform: { insert: 'e', deleteLeft: 0}, + transform: { insert: 'apple', deleteLeft: 0}, displayAs: 'apple', tag: 'keep' }, { - transform: { insert: 'y', deleteLeft: 0}, + transform: { insert: 'apply', deleteLeft: 0}, displayAs: 'apply' }, { - transform: { insert: 's', deleteLeft: 1}, + transform: { insert: 'apps', deleteLeft: 1}, displayAs: 'apps' } ], [ // Set 3: { - transform: { insert: 'ied', deleteLeft: 2}, + transform: { insert: 'applied', deleteLeft: 2}, displayAs: 'applied' } ], [ @@ -102,7 +102,7 @@ describe("PredictionContext", () => { suggestions = updateFake.secondCall.args[0]; assert.deepEqual(suggestions.map((obj) => obj.displayAs), ['apple', 'apply', 'apples']); assert.isNotOk(suggestions.find((obj) => obj.tag == 'keep')); - assert.isNotOk(suggestions.find((obj) => obj.transform.deleteLeft != 0)); + assert.isNotOk(suggestions.find((obj) => obj.transform.deleteLeft != 4)); mock.insertTextBeforeCaret('e'); // appl| + e = apple let transcription = mock.buildTranscriptionFrom(initialMock, null, true); @@ -114,7 +114,7 @@ describe("PredictionContext", () => { suggestions = updateFake.thirdCall.args[0]; assert.deepEqual(suggestions.map((obj) => obj.displayAs), ['apple', 'apply', 'apps']); assert.equal(suggestions.find((obj) => obj.tag == 'keep').displayAs, 'apple'); - assert.equal(suggestions.find((obj) => obj.transform.deleteLeft != 0).displayAs, 'apps'); + assert.isOk(suggestions.find((obj) => obj.displayAs == 'apps')); }); it('ignores outdated predictions', async function () { @@ -141,14 +141,17 @@ describe("PredictionContext", () => { suggestions = updateFake.secondCall.args[0]; assert.deepEqual(suggestions.map((obj) => obj.displayAs), ['apple', 'apply', 'apples']); assert.isNotOk(suggestions.find((obj) => obj.tag == 'keep')); - assert.isNotOk(suggestions.find((obj) => obj.transform.deleteLeft != 0)); + assert.isNotOk(suggestions.find((obj) => obj.transform.deleteLeft != 4)); + mock = Mock.from(initialMock); + mock.insertTextBeforeCaret('e'); const baseTranscription = mock.buildTranscriptionFrom(initialMock, null, true); // Mocking: corresponds to the second set of mocked predictions - round 2 of // 'apple', 'apply', 'apples'. const skippedPromise = langProcessor.predict(baseTranscription, dummiedGetLayer()); + mock = Mock.from(initialMock); mock.insertTextBeforeCaret('e'); // appl| + e = apple const finalTranscription = mock.buildTranscriptionFrom(initialMock, null, true); @@ -220,7 +223,7 @@ describe("PredictionContext", () => { suggestions = updateFake.firstCall.args[0]; assert.deepEqual(suggestions.map((obj) => obj.displayAs), ['apple', 'apply', 'apps']); assert.equal(suggestions.find((obj) => obj.tag == 'keep').displayAs, 'apple'); - assert.equal(suggestions.find((obj) => obj.transform.deleteLeft != 0).displayAs, 'apps'); + assert.isOk(suggestions.find((obj) => obj.displayAs == 'apps')); // Now for the real test. previousTextState = Mock.from(textState); // snapshot it! diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts index 430d9c6c7e0..9595f15527a 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/early-correction-search-stopping.tests.ts @@ -1,15 +1,15 @@ import { assert } from 'chai'; -import { CORRECTION_SEARCH_THRESHOLDS, IntermediateCompositedPrediction, ModelCompositor, shouldStopSearchingEarly } from "@keymanapp/lm-worker/test-index"; +import { CORRECTION_SEARCH_THRESHOLDS, IntermediateTokenizedPrediction, ModelCompositor, shouldStopSearchingEarly } from "@keymanapp/lm-worker/test-index"; -function mockIntermediatePrediction(value: number) { +function mockTokenizedPrediction(value: number) { return { metadata: { probabilities: { total: value } } - } as IntermediateCompositedPrediction + } as IntermediateTokenizedPrediction } describe('correction-search: shouldStopSearchingEarly', () => { @@ -22,7 +22,7 @@ describe('correction-search: shouldStopSearchingEarly', () => { assert.equal(predictionProbs.length, ModelCompositor.MAX_SUGGESTIONS, "test setup no longer valid"); // The only part for each entry we actually care about here: .totalProb. - const predictions = predictionProbs.map((entry) => mockIntermediatePrediction(entry)); + const predictions = predictionProbs.map((entry) => mockTokenizedPrediction(entry)); // Thresholding is performed in log-space. // 0.0501 and 0.0499 are offset on each side of 0.05, the last value in the array defined above. @@ -38,8 +38,8 @@ describe('correction-search: shouldStopSearchingEarly', () => { // // Can technically run the method with an empty array, but the actual scenario would have // at least one prediction present in the "found predictions" array. - assert.isFalse(shouldStopSearchingEarly(baseCost, baseCost + expectedThreshold - 0.01, [mockIntermediatePrediction(Math.exp(-1))])); - assert.isTrue(shouldStopSearchingEarly( baseCost, baseCost + expectedThreshold + 0.01, [mockIntermediatePrediction(Math.exp(-1))])); + assert.isFalse(shouldStopSearchingEarly(baseCost, baseCost + expectedThreshold - 0.01, [mockTokenizedPrediction(Math.exp(-1))])); + assert.isTrue(shouldStopSearchingEarly( baseCost, baseCost + expectedThreshold + 0.01, [mockTokenizedPrediction(Math.exp(-1))])); }); it('stops checking corrections earlier when enough predictions have been found', () => { @@ -48,7 +48,7 @@ describe('correction-search: shouldStopSearchingEarly', () => { // The only part for each entry we actually care about here: .totalProb. /** @type {import('#./predict-helpers.js').CorrectionPredictionTuple[]} */ - const predictions = predictionProbs.map((entry) => mockIntermediatePrediction(entry)); + const predictions = predictionProbs.map((entry) => mockTokenizedPrediction(entry)); const baseCost = 1; diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts index a86048b33d4..c012a5ed0b6 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/create-default-keep.tests.ts @@ -93,6 +93,46 @@ const testModelWithCasing = new DummyModel({ }); describe('createDefaultKeep', () => { + it(`creates an 'exact'-match suggestion based on context when no change occurs and no match is found`, () => { + const transformId = 314159; + + const context: Context = { + left: 'appl', + right: '', + startOfBuffer: true, + endOfBuffer: true + }; + + const expectedKeep: IntermediateCompositedPrediction = { + components: { + prediction: { + transform: { + insert: 'appl', + deleteLeft: 4, + id: transformId + }, + transformId, + displayAs: '', + matchesModel: false, + tag: 'keep' + }, + correction: 'appl' + }, + metadata: { + probabilities: { + prediction: 1, + correction: 1, + total: 1 * 1 + }, + autoSelectable: false, + matchLevel: SuggestionSimilarity.exact + } + }; + + const tuple = createDefaultKeep(testModelWithCasing, context, { sample: { insert: '', deleteLeft: 0, id: transformId }, p: 1}); + assert.deepEqual(tuple, expectedKeep); + }); + it(`creates an 'exact'-match suggestion based on simple primary input`, () => { const context: Context = { left: 'iphon', diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-tokenized-correction-sequence.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-tokenized-correction-sequence.tests.ts index 4ce8b40b53b..83996b20bb8 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-tokenized-correction-sequence.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-tokenized-correction-sequence.tests.ts @@ -21,7 +21,7 @@ import { ContextState, ContextToken, ContextTokenization, - IntermediateCompositedPrediction, + IntermediateTokenizedPrediction, ModelCompositor, TokenizationResultMapping } from "@keymanapp/lm-worker/test-index"; @@ -78,13 +78,17 @@ describe('determineTokenizedCorrectionSequence', () => { endOfBuffer: true }); - assert.deepEqual(results.tokenizedCorrection, [ + assert.deepEqual(results.tokens, [ { - sample: { - insert: 'fo', - deleteLeft: 0 + correction: { + sample: { + insert: 'fo', + deleteLeft: 0 + }, + p: trueInput.p }, - p: trueInput.p + casingRoot: 'fo', + autoSelectable: true } ]); }); @@ -129,12 +133,20 @@ describe('determineTokenizedCorrectionSequence', () => { endOfBuffer: true }); - assert.equal(results.tokenizedCorrection.length, 1); - assert.deepEqual(results.tokenizedCorrection[0].sample, { - insert: ' ', - deleteLeft: 0 - }); - assert.approximately(results.tokenizedCorrection[0].p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + assert.equal(results.tokens.length, 1); + assert.approximately(results.tokens[0].correction.p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + + assert.deepEqual(results.tokens, [{ + correction: { + sample: { + insert: ' ', + deleteLeft: 0 + }, + p: results.tokens[0].correction.p + }, + casingRoot: ' ', + autoSelectable: false + }]); }); it(`properly analyzes common-case word-start - beginning a new token`, () => { @@ -178,12 +190,20 @@ describe('determineTokenizedCorrectionSequence', () => { }); - assert.equal(results.tokenizedCorrection.length, 1); - assert.deepEqual(results.tokenizedCorrection[0].sample, { - insert: 'f', - deleteLeft: 0 - }); - assert.approximately(results.tokenizedCorrection[0].p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + assert.equal(results.tokens.length, 1); + assert.approximately(results.tokens[0].correction.p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + + assert.deepEqual(results.tokens, [{ + correction: { + sample: { + insert: 'f', + deleteLeft: 0 + }, + p: results.tokens[0].correction.p + }, + casingRoot: 'f', + autoSelectable: true + }]); }); it(`properly analyzes post-merge case`, () => { @@ -232,15 +252,17 @@ describe('determineTokenizedCorrectionSequence', () => { endOfBuffer: true }); - assert.deepEqual(results.tokenizedCorrection, [ - { + assert.deepEqual(results.tokens, [{ + correction: { sample: { insert: 'can\'t', deleteLeft: 0 }, p: trueInput.p - } - ]); + }, + casingRoot: 'can\'t', + autoSelectable: true + }]); }); // Will be handled far better after resolving multi-tokenization handling. @@ -285,12 +307,20 @@ describe('determineTokenizedCorrectionSequence', () => { }); - assert.equal(results.tokenizedCorrection.length, 1); - assert.deepEqual(results.tokenizedCorrection[0].sample, { - insert: ' ', - deleteLeft: 0 - }); - assert.approximately(results.tokenizedCorrection[0].p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + assert.equal(results.tokens.length, 1); + assert.approximately(results.tokens[0].correction.p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + + assert.deepEqual(results.tokens, [{ + correction: { + sample: { + insert: ' ', + deleteLeft: 0 + }, + p: results.tokens[0].correction.p + }, + casingRoot: ' ', + autoSelectable: false + }]); }); it(`properly analyzes complex transition - multi-token replacement`, () => { @@ -335,21 +365,30 @@ describe('determineTokenizedCorrectionSequence', () => { }); - assert.equal(results.tokenizedCorrection.length, 1); - assert.deepEqual(results.tokenizedCorrection[0].sample, { - insert: 'd', - deleteLeft: 0 - }); - assert.approximately(results.tokenizedCorrection[0].p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + assert.equal(results.tokens.length, 1); + assert.approximately(results.tokens[0].correction.p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + + assert.deepEqual(results.tokens, [{ + correction: { + sample: { + insert: 'd', + deleteLeft: 0 + }, + p: results.tokens[0].correction.p + }, + casingRoot: 'd', + autoSelectable: true + }]); - const dummiedTuple: IntermediateCompositedPrediction = { - components: { + const dummiedTuple: IntermediateTokenizedPrediction = { + components: [{ prediction: { transform: { insert: 'dog', deleteLeft: 0 }, displayAs: 'dog' }, - correction: 'd' - }, + correction: 'd', + casingRoot: 'd' + }], metadata: { probabilities: { prediction: .25, diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-traversalless-correction-sequences.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-traversalless-correction-sequences.tests.ts index e17b8ad32f5..eeae00e7d42 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-traversalless-correction-sequences.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-traversalless-correction-sequences.tests.ts @@ -13,7 +13,7 @@ import { LexicalModelTypes } from "@keymanapp/common-types"; import * as wordBreakers from '@keymanapp/models-wordbreakers'; import { KMWString } from '@keymanapp/web-utils'; -import { IntermediateCompositedPrediction, ModelCompositor, determineTraversallessCorrectionSequences, models } from "@keymanapp/lm-worker/test-index"; +import { determineTraversallessCorrectionSequences, IntermediateTokenizedPrediction, ModelCompositor, models } from "@keymanapp/lm-worker/test-index"; import Context = LexicalModelTypes.Context; import DummyModel = models.DummyModel; @@ -80,12 +80,16 @@ describe('determineTraversallessCorrectionSequences', () => { } ); - assert.deepEqual(entry.tokenizedCorrection, [{ - sample: { - insert: 'appl', - deleteLeft: 0 + assert.deepEqual(entry.tokens, [{ + correction: { + sample: { + insert: 'appl', + deleteLeft: 0 + }, + p: trueInput.p }, - p: trueInput.p + casingRoot: 'appl', + autoSelectable: true }]); }); @@ -122,13 +126,19 @@ describe('determineTraversallessCorrectionSequences', () => { } ); - assert.deepEqual(entry.tokenizedCorrection, [{ - sample: { - insert: 'iPhone', - deleteLeft: 0 - }, - p: trueInput.p - }]); + assert.deepEqual(predictionRootEntries[0].tokens, [ + { + correction: { + sample: { + insert: 'iPhone', + deleteLeft: 0 + }, + p: trueInput.p + }, + casingRoot: 'iPhone', + autoSelectable: true + } + ]); }); it(`properly analyzes common-case token-extension - adding a letter to an existing word`, () => { @@ -163,13 +173,19 @@ describe('determineTraversallessCorrectionSequences', () => { } ); - assert.deepEqual(entry.tokenizedCorrection, [{ - sample: { - insert: 'fo', - deleteLeft: 0 - }, - p: trueInput.p - }]); + assert.deepEqual(entry.tokens, [ + { + correction: { + sample: { + insert: 'fo', + deleteLeft: 0 + }, + p: trueInput.p + }, + casingRoot: 'fo', + autoSelectable: true + } + ]); }); it(`properly analyzes common-case whitespace - ending a token and adding a new one`, () => { @@ -204,12 +220,20 @@ describe('determineTraversallessCorrectionSequences', () => { } ); - assert.equal(entry.tokenizedCorrection.length, 1); - assert.deepEqual(entry.tokenizedCorrection[0].sample, { - insert: '', - deleteLeft: 0 - }); - assert.approximately(entry.tokenizedCorrection[0].p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + assert.equal(entry.tokens.length, 1); + assert.approximately(entry.tokens[0].correction.p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + + assert.deepEqual(entry.tokens, [{ + correction: { + sample: { + insert: '', + deleteLeft: 0 + }, + p: entry.tokens[0].correction.p + }, + casingRoot: '', + autoSelectable: false + }]); }); @@ -245,12 +269,20 @@ describe('determineTraversallessCorrectionSequences', () => { } ); - assert.equal(entry.tokenizedCorrection.length, 1); - assert.deepEqual(entry.tokenizedCorrection[0].sample, { - insert: 'f', - deleteLeft: 0 - }); - assert.approximately(entry.tokenizedCorrection[0].p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + assert.equal(entry.tokens.length, 1); + assert.approximately(entry.tokens[0].correction.p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + + assert.deepEqual(entry.tokens, [{ + correction: { + sample: { + insert: 'f', + deleteLeft: 0 + }, + p: entry.tokens[0].correction.p + }, + casingRoot: 'f', + autoSelectable: true + }]); }); it(`properly analyzes post-merge case`, () => { @@ -285,12 +317,16 @@ describe('determineTraversallessCorrectionSequences', () => { } ); - assert.deepEqual(entry.tokenizedCorrection, [{ - sample: { - insert: 'can\'t', - deleteLeft: 0 + assert.deepEqual(entry.tokens, [{ + correction: { + sample: { + insert: 'can\'t', + deleteLeft: 0 + }, + p: trueInput.p }, - p: trueInput.p + casingRoot: 'can\'t', + autoSelectable: true }]); }); @@ -328,12 +364,20 @@ describe('determineTraversallessCorrectionSequences', () => { // } // ); - assert.equal(entry.tokenizedCorrection.length, 1); - assert.deepEqual(entry.tokenizedCorrection[0].sample, { - insert: '', - deleteLeft: 0 - }); - assert.approximately(entry.tokenizedCorrection[0].p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + assert.equal(entry.tokens.length, 1); + assert.approximately(entry.tokens[0].correction.p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + + assert.deepEqual(entry.tokens, [{ + correction: { + sample: { + insert: '', + deleteLeft: 0 + }, + p: entry.tokens[0].correction.p + }, + casingRoot: '', + autoSelectable: false + }]); }); it(`properly analyzes complex transition - multi-token replacement`, () => { @@ -370,21 +414,30 @@ describe('determineTraversallessCorrectionSequences', () => { } ); - assert.equal(entry.tokenizedCorrection.length, 1); - assert.deepEqual(entry.tokenizedCorrection[0].sample, { - insert: 'd', - deleteLeft: 0 - }); - assert.approximately(entry.tokenizedCorrection[0].p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); + assert.equal(entry.tokens.length, 1); + assert.approximately(entry.tokens[0].correction.p, Math.pow(trueInput.p, ModelCompositor.SINGLE_CHAR_KEY_PROB_EXPONENT), Number.EPSILON*1000); - const dummiedTuple: IntermediateCompositedPrediction = { - components: { + assert.deepEqual(entry.tokens, [{ + correction: { + sample: { + insert: 'd', + deleteLeft: 0 + }, + p: entry.tokens[0].correction.p + }, + casingRoot: 'd', + autoSelectable: true + }]); + + const dummiedTuple: IntermediateTokenizedPrediction = { + components: [{ prediction: { transform: { insert: 'dog', deleteLeft: 0 }, displayAs: 'dog' }, - correction: 'd' - }, + correction: 'd', + casingRoot: 'd' + }], metadata: { probabilities: { prediction: .25, diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-correction-sequence.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-correction-sequence.tests.ts index 64b97af490b..0e9d02f8f72 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-correction-sequence.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/predict-from-correction-sequence.tests.ts @@ -4,16 +4,12 @@ import { assert } from 'chai'; import { deepCopy } from "@keymanapp/web-utils"; import { LexicalModelTypes } from '@keymanapp/common-types'; -import { EDIT_DISTANCE_COST_SCALE, models, predictFromCorrectionSequence, tupleDisplayOrderSort } from "@keymanapp/lm-worker/test-index"; +import { EDIT_DISTANCE_COST_SCALE, PredictionParameters, models, predictFromCorrectionSequence, tupleDisplayOrderSort } from "@keymanapp/lm-worker/test-index"; import CasingFunction = LexicalModelTypes.CasingFunction; -import Context = LexicalModelTypes.Context; -import Distribution = LexicalModelTypes.Distribution; import DummyModel = models.DummyModel; import Outcome = LexicalModelTypes.Outcome; -import ProbabilityMass = LexicalModelTypes.ProbabilityMass; import Suggestion = LexicalModelTypes.Suggestion; -import Transform = LexicalModelTypes.Transform; // See: developer/src/kmc-model/model-defaults.ts, defaultApplyCasing const applyCasing: CasingFunction = (casing, text) => { @@ -75,21 +71,32 @@ const DUMMY_MODEL_CONFIG = { describe('predictFromCorrectionSequence', () => { describe('on a single correction', () => { it('constructs suggestions matching multiple lexical entries directly - no transform ID', () => { - const context: Context = { - left: '', - right: '', - startOfBuffer: true, - endOfBuffer: true - }; + const transitionID = 12345; - const correctionDistribution: Distribution = [{ - sample: { - insert: 'Its', - deleteLeft: 0 - }, - p: 0.6 - } - ]; + const parameters: PredictionParameters = { + rootContext: { + left: '', + right: '', + startOfBuffer: true, + endOfBuffer: true + }, + tokens: [ + { + correction: { + sample: { + insert: 'Its', + deleteLeft: 0, + id: transitionID + }, + p: 0.6 + }, + casingRoot: '', + autoSelectable: true + } + ], + applyInPost: (x) => x, + deleteLeft: 0 + }; const dummied_suggestions: Outcome[] = [ { @@ -114,13 +121,12 @@ describe('predictFromCorrectionSequence', () => { futureSuggestions: [ dummied_suggestions ] }); - const transitionID = 12345; - const predictions = predictFromCorrectionSequence(model, correctionDistribution, context, transitionID); - predictions.forEach((entry) => assert.equal(entry.components.correction, 'Its')); + const predictions = predictFromCorrectionSequence(model, parameters); + predictions.forEach((entry) => assert.equal(entry.components[0].correction, 'Its')); predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, 0.6)); predictions.sort(tupleDisplayOrderSort); - assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.prediction), dummied_suggestions.map((s) => { + assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components[0].prediction), dummied_suggestions.map((s) => { delete s.p; s.transformId = transitionID; s.transform.id = transitionID; @@ -132,23 +138,32 @@ describe('predictFromCorrectionSequence', () => { }); it('constructs suggestions matching multiple lexical entries directly - with transform ID', () => { - const context: Context = { - left: '', - right: '', - startOfBuffer: true, - endOfBuffer: true - }; - const transitionID = 314159; - const correctionDistribution: Distribution = [{ - sample: { - insert: 'Its', - deleteLeft: 0, - id: transitionID - }, - p: 0.6 - } - ]; + + const parameters: PredictionParameters = { + rootContext: { + left: '', + right: '', + startOfBuffer: true, + endOfBuffer: true + }, + tokens: [ + { + correction: { + sample: { + insert: 'Its', + deleteLeft: 0, + id: transitionID + }, + p: 0.6 + }, + casingRoot: '', + autoSelectable: true + } + ], + applyInPost: (x) => x, + deleteLeft: 0 + }; const dummied_suggestions: Outcome[] = [ { @@ -173,42 +188,52 @@ describe('predictFromCorrectionSequence', () => { futureSuggestions: [ dummied_suggestions ] }); - const predictions = predictFromCorrectionSequence(model, correctionDistribution, context, transitionID); - predictions.forEach((entry) => assert.equal(entry.components.correction, 'Its')); + const predictions = predictFromCorrectionSequence(model, parameters); + predictions.forEach((entry) => assert.equal(entry.components[0].correction, 'Its')); predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, 0.6)); predictions.sort(tupleDisplayOrderSort); - assert.sameOrderedMembers(predictions.map((entry) => entry.components.prediction.displayAs), ["it's", "its"]); - assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.prediction), dummied_suggestions.map((entry) => { + assert.sameOrderedMembers(predictions.map((entry) => entry.components[0].prediction.displayAs), ["it's", "its"]); + assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components[0].prediction), dummied_suggestions.map((entry) => { entry = deepCopy(entry); entry.transformId = transitionID; entry.transform.id = transitionID; - delete entry.p; return entry; })); assert.approximately(predictions[0].metadata.probabilities.total, 0.18 * 0.6, 0.00001); assert.approximately(predictions[1].metadata.probabilities.total, 0.02 * 0.6, 0.00001); - predictions.forEach((prediction) => assert.equal(prediction.components.prediction.transformId, transitionID)); + predictions.forEach((prediction) => assert.equal(prediction.components[0].prediction.transformId, transitionID)); }); it('constructs suggestions without input (as if after a context reset)', () => { - const context: Context = { - left: 'appl', - right: '', - startOfBuffer: true, - endOfBuffer: true + const transitionID = 271828; + + const parameters: PredictionParameters = { + rootContext: { + left: '', + right: '', + startOfBuffer: true, + endOfBuffer: true + }, + tokens: [ + { + correction: { + sample: { + insert: 'appl', + deleteLeft: 0, + id: transitionID + }, + p: 1 + }, + casingRoot: 'appl', + autoSelectable: true + } + ], + applyInPost: (x) => x, + deleteLeft: 0 }; - const correctionDistribution: Distribution = [{ - sample: { - insert: 'appl', - deleteLeft: 4 - }, - p: 1 - } - ]; - const dummied_suggestions: Outcome[] = [ { transform: { @@ -225,60 +250,79 @@ describe('predictFromCorrectionSequence', () => { futureSuggestions: [ dummied_suggestions ] }); - const transitionID = 12345; - const predictions = predictFromCorrectionSequence(model, correctionDistribution, context, transitionID); - predictions.forEach((entry) => assert.equal(entry.components.correction, 'appl')); + const predictions = predictFromCorrectionSequence(model, parameters); + predictions.forEach((entry) => assert.deepEqual(entry.components.map((c => c.correction)), ['appl'])); predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, 1)); predictions.sort(tupleDisplayOrderSort); - assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.prediction), dummied_suggestions.map((s) => { + assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.map((c) => c.prediction)), [dummied_suggestions.map((s) => { delete s.p; s.transformId = transitionID; s.transform.id = transitionID; return s; - })); + })]); }); }); describe('on a sequence of corrections', () => { it('returns results even if some correction tokens lack predictions', () => { - const context: Context = { - left: 'i want to eat a ', - right: '', - startOfBuffer: true, - endOfBuffer: true - }; + const transitionID = 101; - const correctionSequence: Distribution = [ - { - sample: { - insert: 'golden', - deleteLeft: 0 - }, - p: 0.1 - }, { - sample: { - insert: ' ', - deleteLeft: 0 - }, - p: 0.2 - }, { - sample: { - insert: 'app', - deleteLeft: 0 - }, - p: 0.2 - } - ]; + const parameters: PredictionParameters = { + rootContext: { + left: 'i want to eat a ', + right: '', + startOfBuffer: true, + endOfBuffer: true + }, + tokens: [ + { + correction: { + sample: { + insert: 'g', + deleteLeft: 0, + id: transitionID + }, + p: 0.1 + }, + casingRoot: 'g', + autoSelectable: true + }, { + correction: { + sample: { + insert: ' ', + deleteLeft: 0, + id: transitionID + }, + p: 0.2 + }, + casingRoot: ' ', + autoSelectable: true + }, { + correction: { + sample: { + insert: 'apple', + deleteLeft: 0, + id: transitionID + }, + p: 0.2 + }, + casingRoot: 'apple', + autoSelectable: true + } + ], + applyInPost: (x) => x, + deleteLeft: 0 + }; const dummied_suggestion_sequences: Outcome[][] = [ [ { transform: { - insert: "golden", + insert: "g", deleteLeft: 0 }, - displayAs: "golden", + displayAs: "g", p: 0.1 } ], @@ -295,70 +339,105 @@ describe('predictFromCorrectionSequence', () => { ] ]; - const transitionID = 101; - const expected_prediction: ProbabilityMass = { - sample: { + const expected_predictions: Suggestion[] = [ + { transform: { - insert: 'golden apple', + insert: 'g', + deleteLeft: 0, + id: transitionID + }, + displayAs: 'g', + transformId: transitionID, + }, { + transform: { + insert: ' ', deleteLeft: 0, id: transitionID }, - displayAs: 'golden apple', + displayAs: ' ', transformId: transitionID - }, p: dummied_suggestion_sequences.map((dist) => { + }, { + transform: { + insert: 'apple', + deleteLeft: 0, + id: transitionID + }, + displayAs: 'apple', + transformId: transitionID + } + ]; + + const expected_prediction_p = dummied_suggestion_sequences.map((dist) => { return dist[0] }).reduce((accum, curr) => { return accum * (curr ? curr.p : Math.exp(-EDIT_DISTANCE_COST_SCALE)) - }, 1) - } + }, 1); const model = new DummyModel({ ...DUMMY_MODEL_CONFIG, futureSuggestions: dummied_suggestion_sequences }); - const predictions = predictFromCorrectionSequence(model, correctionSequence, context, transitionID); - predictions.forEach((entry) => assert.equal(entry.components.correction, 'golden app')); - predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, correctionSequence.reduce((accum, curr) => accum * curr.p, 1))); + const predictions = predictFromCorrectionSequence(model, parameters); + predictions.forEach((entry) => assert.deepEqual(entry.components.map((c) => c.correction), ['g', ' ', 'apple'])); + predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, parameters.tokens.reduce((accum, curr) => accum * curr.correction.p, 1))); predictions.sort(tupleDisplayOrderSort); - assert.equal(predictions[0].components.prediction.transform.insert, 'golden apple'); - assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.prediction), [expected_prediction.sample]); + assert.sameDeepOrderedMembers(predictions[0].components.map((c) => c.prediction), expected_predictions); - assert.approximately(predictions[0].metadata.probabilities.prediction, expected_prediction.p, 0.00001); - assert.equal(predictions[0].components.prediction.transformId, transitionID); + assert.approximately(predictions[0].metadata.probabilities.prediction, expected_prediction_p, 0.00001); }); it('returns no results if all correction tokens lack predictions', () => { - const context: Context = { - left: 'i want to eat a ', - right: '', - startOfBuffer: true, - endOfBuffer: true + const transitionID = 3; + + const parameters: PredictionParameters = { + rootContext: { + left: 'i want to eat a ', + right: '', + startOfBuffer: true, + endOfBuffer: true + }, + tokens: [ + { + correction: { + sample: { + insert: 'golden', + deleteLeft: 0, + id: transitionID + }, + p: 0.1 + }, + casingRoot: 'golden', + autoSelectable: true + }, { + correction: { + sample: { + insert: ' ', + deleteLeft: 0, + id: transitionID + }, + p: 0.2 + }, + casingRoot: ' ', + autoSelectable: true + }, { + correction: { + sample: { + insert: 'app', + deleteLeft: 0, + id: transitionID + }, + p: 0.2 + }, + casingRoot: 'app', + autoSelectable: true + } + ], + applyInPost: (x) => x, + deleteLeft: 0 }; - const correctionSequence: Distribution = [ - { - sample: { - insert: 'golden', - deleteLeft: 0 - }, - p: 0.1 - }, { - sample: { - insert: ' ', - deleteLeft: 0 - }, - p: 0.2 - }, { - sample: { - insert: 'app', - deleteLeft: 0 - }, - p: 0.2 - } - ]; - const dummied_suggestion_sequences: Outcome[][] = [ [], [], @@ -370,39 +449,59 @@ describe('predictFromCorrectionSequence', () => { futureSuggestions: dummied_suggestion_sequences }); - const predictions = predictFromCorrectionSequence(model, correctionSequence, context, 3); + const predictions = predictFromCorrectionSequence(model, parameters); assert.deepEqual(predictions, []); }); it('uses only the best suggestion for non-final corrected tokens', () => { - const context: Context = { - left: 'i want to eat a ', - right: '', - startOfBuffer: true, - endOfBuffer: true - }; + const transitionID = 42; - const correctionSequence: Distribution = [ - { - sample: { - insert: 'g', - deleteLeft: 0 - }, - p: 0.1 - }, { - sample: { - insert: ' ', - deleteLeft: 0 - }, - p: 0.2 - }, { - sample: { - insert: 'app', - deleteLeft: 0 - }, - p: 0.2 - } - ]; + const parameters: PredictionParameters = { + rootContext: { + left: 'i want to eat a ', + right: '', + startOfBuffer: true, + endOfBuffer: true + }, + tokens: [ + { + correction: { + sample: { + insert: 'g', + deleteLeft: 0, + id: transitionID + }, + p: 0.1 + }, + casingRoot: 'g', + autoSelectable: true + }, { + correction: { + sample: { + insert: ' ', + deleteLeft: 0, + id: transitionID + }, + p: 0.2 + }, + casingRoot: ' ', + autoSelectable: true + }, { + correction: { + sample: { + insert: 'app', + deleteLeft: 0, + id: transitionID + }, + p: 0.2 + }, + casingRoot: 'app', + autoSelectable: true + } + ], + applyInPost: (x) => x, + deleteLeft: 0 + }; const dummied_suggestion_sequences: Outcome[][] = [ [ @@ -442,72 +541,109 @@ describe('predictFromCorrectionSequence', () => { ] ]; - const transitionID = 42; - const expected_prediction: ProbabilityMass = { - sample: { + const expected_prediction_p = dummied_suggestion_sequences + .map((dist) => { + return dist[0] + }).reduce((accum, curr) => { + return accum * (curr ? curr.p : Math.exp(-EDIT_DISTANCE_COST_SCALE)) + }, 1); + + const expected_predictions: Suggestion[] = [ + { transform: { - insert: 'golden apple', + insert: 'golden', deleteLeft: 0, id: transitionID }, - displayAs: 'golden apple', - transformId: 42 - }, p: dummied_suggestion_sequences.map((dist) => { - return dist[0] - }).reduce((accum, curr) => { - return accum * (curr ? curr.p : Math.exp(-EDIT_DISTANCE_COST_SCALE)) - }, 1) - } + displayAs: 'golden', + transformId: transitionID + }, { + transform: { + insert: ' ', + deleteLeft: 0, + id: transitionID + }, + displayAs: ' ', + transformId: transitionID + }, { + transform: { + insert: 'apple', + deleteLeft: 0, + id: transitionID + }, + displayAs: 'apple', + transformId: transitionID + } + ]; const model = new DummyModel({ ...DUMMY_MODEL_CONFIG, futureSuggestions: dummied_suggestion_sequences }); - const predictions = predictFromCorrectionSequence(model, correctionSequence, context, transitionID); + const predictions = predictFromCorrectionSequence(model, parameters); // There should be no variations with 'green' or 'gray' apples. assert.equal(predictions.length, 1); - predictions.forEach((entry) => assert.equal(entry.components.correction, 'g app')); - predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, correctionSequence.reduce((accum, curr) => accum * curr.p, 1))); + predictions.forEach((entry) => assert.deepEqual(entry.components.map((c) => c.correction), ['g', ' ', 'app'])); + predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, parameters.tokens.reduce((accum, curr) => accum * curr.correction.p, 1))); predictions.sort(tupleDisplayOrderSort); - assert.equal(predictions[0].components.prediction.transform.insert, 'golden apple'); - assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.prediction), [expected_prediction.sample]); + assert.deepEqual(predictions[0].components.map((c) => c.prediction.transform.insert), ['golden', ' ', 'apple']); + assert.sameDeepOrderedMembers(predictions[0].components.map((entry) => entry.prediction), expected_predictions); - assert.approximately(predictions[0].metadata.probabilities.prediction, expected_prediction.p, 0.00001); - assert.equal(predictions[0].components.prediction.transformId, transitionID); + assert.approximately(predictions[0].metadata.probabilities.prediction, expected_prediction_p, 0.00001); }); it('uses all suggestions generated from context-final correction-tokens', () => { - const context: Context = { - left: 'i want to eat a ', - right: '', - startOfBuffer: true, - endOfBuffer: true - }; + const transitionID = 13; - const correctionSequence: Distribution = [ - { - sample: { - insert: 'golden', - deleteLeft: 0 - }, - p: 0.1 - }, { - sample: { - insert: ' ', - deleteLeft: 0 - }, - p: 0.2 - }, { - sample: { - insert: 'app', - deleteLeft: 0 - }, - p: 0.2 - } - ]; + const parameters: PredictionParameters = { + rootContext: { + left: 'i want to eat a ', + right: '', + startOfBuffer: true, + endOfBuffer: true + }, + tokens: [ + { + correction: { + sample: { + insert: 'golden', + deleteLeft: 0, + id: transitionID + }, + p: 0.1 + }, + casingRoot: 'golden', + autoSelectable: true + }, { + correction: { + sample: { + insert: ' ', + deleteLeft: 0, + id: transitionID + }, + p: 0.2 + }, + casingRoot: ' ', + autoSelectable: true + }, { + correction: { + sample: { + insert: 'app', + deleteLeft: 0, + id: transitionID + }, + p: 0.2 + }, + casingRoot: 'app', + autoSelectable: true + } + ], + applyInPost: (x) => x, + deleteLeft: 0 + }; const dummied_suggestion_sequences: Outcome[][] = [ [ @@ -549,29 +685,40 @@ describe('predictFromCorrectionSequence', () => { const tailIndex = dummied_suggestion_sequences.length - 1; - const transitionID = 13; - const expected_predictions: ProbabilityMass[] = dummied_suggestion_sequences[tailIndex].map((p) => { - const expectedText = `golden ${p.transform.insert}`; + const expected_prediction_prefix_p = dummied_suggestion_sequences + .slice(0, dummied_suggestion_sequences.length - 1) + .map((dist) => { + return dist[0] + }).reduce((accum, curr) => { + return accum * (curr ? curr.p : Math.exp(-EDIT_DISTANCE_COST_SCALE)) + }, 1); - return { - sample: { - transform: { - insert: expectedText, - deleteLeft: 0, - id: transitionID - }, - displayAs: expectedText, - transformId: transitionID - }, p: dummied_suggestion_sequences.map((dist) => { - return dist[0] - }).reduce((accum, curr, index) => { - if(tailIndex == index) { - return accum * p.p; - } else { - return accum * (curr ? curr.p : Math.exp(-EDIT_DISTANCE_COST_SCALE)) - } - }, 1) - }; + const expected_prediction_prefix: Suggestion[] = [ + { + transform: { + insert: 'golden', + deleteLeft: 0, + id: transitionID + }, + displayAs: 'golden', + transformId: transitionID + }, { + transform: { + insert: ' ', + deleteLeft: 0, + id: transitionID + }, + displayAs: ' ', + transformId: transitionID + } + ]; + + const expected_prediction_sequences: Suggestion[][] = dummied_suggestion_sequences[tailIndex].map((p) => { + return [...expected_prediction_prefix, p]; + }); + + const expected_prediction_seq_probs: number[] = dummied_suggestion_sequences[tailIndex].map((p) => { + return p.p * expected_prediction_prefix_p; }); const model = new DummyModel({ @@ -579,22 +726,17 @@ describe('predictFromCorrectionSequence', () => { futureSuggestions: dummied_suggestion_sequences }); - const predictions = predictFromCorrectionSequence(model, correctionSequence, context, transitionID); + const predictions = predictFromCorrectionSequence(model, parameters); assert.equal(predictions.length, dummied_suggestion_sequences[dummied_suggestion_sequences.length - 1].length); - predictions.forEach((entry) => assert.equal(entry.components.correction, 'golden app')); - predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, correctionSequence.reduce((accum, curr) => accum * curr.p, 1))); + predictions.forEach((entry) => assert.deepEqual(entry.components.map((c) => c.correction), ['golden', ' ', 'app'])); + predictions.forEach((entry) => assert.equal(entry.metadata.probabilities.correction, parameters.tokens.reduce((accum, curr) => accum * curr.correction.p, 1))); predictions.sort(tupleDisplayOrderSort); - assert.sameOrderedMembers( - predictions.map((t) => t.components.prediction.transform.insert), - ['golden apple', 'golden application', 'golden appetizer'] - ); - assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.prediction), expected_predictions.map((p => p.sample))); + assert.sameDeepOrderedMembers(predictions.map((entry) => entry.components.map((c) => c.prediction)), expected_prediction_sequences); for(let i = 0; i < predictions.length; i++) { - assert.approximately(predictions[i].metadata.probabilities.prediction, expected_predictions[i].p, 0.00001, `Expected probabilty mismatch at index ${i}`); - assert.equal(predictions[i].components.prediction.transformId, transitionID); + assert.approximately(predictions[i].metadata.probabilities.prediction, expected_prediction_seq_probs[i], 0.00001, `Expected probabilty mismatch at index ${i}`); } }); }); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts index a485fecd053..e8ad9d2e8fe 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/suggestion-similarity.tests.ts @@ -277,6 +277,46 @@ describe('processSimilarity', () => { assert.deepEqual(it_is.components.prediction, keep_it_is); }); + it('operates properly when no transition in context occurs', () => { + const transformId = 314159; + + const context: Context = { + left: 'appl', + right: '', + startOfBuffer: true, + endOfBuffer: true + }; + + const distribution: IntermediateCompositedPrediction[] = [ + { + components: { + prediction: { + transform: { + insert: 'apple', + deleteLeft: 4, + id: transformId + }, + transformId, + displayAs: 'apple' + }, + correction: 'appl' + }, + metadata: { + probabilities: { + prediction: 1, + correction: 1, + total: 1 + }, + autoSelectable: true + } + } + ]; + + const result = processSimilarity(testModelWithCasing, distribution, context, context); + assert.isFalse(result); + assert.equal(distribution[0].metadata.matchLevel, SuggestionSimilarity.none); + }); + describe('with casing', () => { // If we ever add a mode that can force lowercase for certain words even // when the context is title-cased or upper-cased, this scenario would be diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/suggestion-casing.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/suggestion-casing.tests.ts index dd586eab646..b54b688614a 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/suggestion-casing.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/suggestion-casing.tests.ts @@ -13,7 +13,7 @@ import * as wordBreakers from '@keymanapp/models-wordbreakers'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; import { LexicalModelTypes } from '@keymanapp/common-types'; -import { applySuggestionCasing, models } from '@keymanapp/lm-worker/test-index'; +import { TokenizedPredictionData, applySuggestionCasing, models } from '@keymanapp/lm-worker/test-index'; import CasingFunction = LexicalModelTypes.CasingFunction; import TrieModel = models.TrieModel; @@ -45,117 +45,137 @@ describe('applySuggestionCasing', function() { ); it('properly cases suggestions with no suggestion root', function() { - let suggestion = { - transform: { - insert: 'the', - deleteLeft: 0 + let suggestion: TokenizedPredictionData[] = [{ + prediction: { + transform: { + insert: 'the', + deleteLeft: 0 + }, + displayAs: 'the' }, - displayAs: 'the' - }; - - applySuggestionCasing(suggestion, '', plainCasedModel, 'initial'); - assert.equal(suggestion.displayAs, 'The'); - assert.equal(suggestion.transform.insert, 'The'); - - suggestion = { - transform: { - insert: 'thE', - deleteLeft: 0 - }, - displayAs: 'thE' - }; - - applySuggestionCasing(suggestion, '', plainCasedModel, 'initial'); - assert.equal(suggestion.displayAs, 'ThE'); - assert.equal(suggestion.transform.insert, 'ThE'); - - suggestion = { - transform: { - insert: 'the', - deleteLeft: 0 + correction: '', + casingRoot: 'th' + }]; + + applySuggestionCasing(suggestion[0], plainCasedModel); + assert.equal(suggestion[0].prediction.displayAs, 'the'); + assert.equal(suggestion[0].prediction.transform.insert, 'the'); + + suggestion = [{ + prediction: { + transform: { + insert: 'ThE', + deleteLeft: 0 + }, + displayAs: 'ThE' }, - displayAs: 'the' - }; + correction: '', + casingRoot: 'Th' + }]; - applySuggestionCasing(suggestion, '', plainCasedModel, 'upper'); - assert.equal(suggestion.displayAs, 'THE'); - assert.equal(suggestion.transform.insert, 'THE'); + applySuggestionCasing(suggestion[0], plainCasedModel); + assert.equal(suggestion[0].prediction.displayAs, 'ThE'); + assert.equal(suggestion[0].prediction.transform.insert, 'ThE'); }); it('properly cases suggestions that fully replace the suggestion root', function() { - let suggestion = { - transform: { - insert: 'therefore', - deleteLeft: 3 + let suggestion: TokenizedPredictionData[] = [{ + prediction: { + transform: { + insert: 'therefore', + deleteLeft: 3 + }, + displayAs: 'therefore' }, - displayAs: 'therefore' - }; - - applySuggestionCasing(suggestion, 'the', plainCasedModel, 'initial'); - assert.equal(suggestion.displayAs, 'Therefore'); - assert.equal(suggestion.transform.insert, 'Therefore'); - - suggestion = { - transform: { - insert: 'thereFore', - deleteLeft: 3 + correction: 'The', + casingRoot: 'Th' + }]; + + applySuggestionCasing(suggestion[0], plainCasedModel); + assert.equal(suggestion[0].prediction.displayAs, 'Therefore'); + assert.equal(suggestion[0].prediction.transform.insert, 'Therefore'); + + suggestion = [{ + prediction: { + transform: { + insert: 'thereFore', + deleteLeft: 3 + }, + displayAs: 'thereFore' }, - displayAs: 'thereFore' - }; - - applySuggestionCasing(suggestion, 'the', plainCasedModel, 'initial'); - assert.equal(suggestion.displayAs, 'ThereFore'); - assert.equal(suggestion.transform.insert, 'ThereFore'); - - suggestion = { - transform: { - insert: 'therefore', - deleteLeft: 3 + correction: 'The', + casingRoot: 'Th' + }]; + + applySuggestionCasing(suggestion[0], plainCasedModel); + assert.equal(suggestion[0].prediction.displayAs, 'ThereFore'); + assert.equal(suggestion[0].prediction.transform.insert, 'ThereFore'); + + suggestion = [{ + prediction: { + transform: { + insert: 'therefore', + deleteLeft: 3 + }, + displayAs: 'therefore' }, - displayAs: 'therefore' - }; + correction: 'THE', + casingRoot: 'TH' + }]; - applySuggestionCasing(suggestion, 'the', plainCasedModel, 'upper'); - assert.equal(suggestion.displayAs, 'THEREFORE'); - assert.equal(suggestion.transform.insert, 'THEREFORE'); + applySuggestionCasing(suggestion[0], plainCasedModel); + assert.equal(suggestion[0].prediction.displayAs, 'THEREFORE'); + assert.equal(suggestion[0].prediction.transform.insert, 'THEREFORE'); }); it('properly cases suggestions that do not fully replace the suggestion root', function() { - let suggestion = { - transform: { - insert: 'erefore', - deleteLeft: 1 + let suggestion: TokenizedPredictionData[] = [{ + prediction: { + transform: { + insert: 'therefore', + deleteLeft: 3 + }, + displayAs: 'therefore' }, - displayAs: 'therefore' - }; + correction: 'The', + casingRoot: 'Th' + }]; // When integrated, the 'the' string comes from a wordbreak operation on the current context. - applySuggestionCasing(suggestion, 'the', plainCasedModel, 'initial'); - assert.equal(suggestion.displayAs, 'Therefore'); - assert.equal(suggestion.transform.insert, 'Therefore'); - - suggestion = { - transform: { - insert: 'ereFore', - deleteLeft: 1 + applySuggestionCasing(suggestion[0], plainCasedModel); + assert.equal(suggestion[0].prediction.displayAs, 'Therefore'); + assert.equal(suggestion[0].prediction.transform.insert, 'Therefore'); + + suggestion = [{ + prediction: { + transform: { + insert: 'ThereFore', + deleteLeft: 3 + }, + displayAs: 'thereFore' }, - displayAs: 'thereFore' - }; - - applySuggestionCasing(suggestion, 'the', plainCasedModel, 'initial'); - assert.equal(suggestion.displayAs, 'ThereFore'); - assert.equal(suggestion.transform.insert, 'ThereFore'); - - suggestion = { - transform: { - insert: 'erefore', - deleteLeft: 1 + correction: 'The', + casingRoot: 'Th' + }]; + + applySuggestionCasing(suggestion[0], plainCasedModel); + assert.equal(suggestion[0].prediction.displayAs, 'ThereFore'); + assert.equal(suggestion[0].prediction.transform.insert, 'ThereFore'); + + suggestion = [{ + prediction: { + transform: { + insert: 'therefore', + deleteLeft: 3 + }, + displayAs: 'therefore' }, - displayAs: 'therefore' - }; + correction: 'THE', + casingRoot: 'TH' + }]; - applySuggestionCasing(suggestion, 'the', plainCasedModel, 'upper'); - assert.equal(suggestion.displayAs, 'THEREFORE'); - assert.equal(suggestion.transform.insert, 'THEREFORE'); + applySuggestionCasing(suggestion[0], plainCasedModel); + assert.equal(suggestion[0].prediction.displayAs, 'THEREFORE'); + assert.equal(suggestion[0].prediction.transform.insert, 'THEREFORE'); }); }); \ No newline at end of file