keymanapp · jahorton · Jun 4, 2026
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts
@@ -108,38 +108,21 @@ export class ContextTokenization {
    */
   readonly tokens: ContextToken[];
 
-  /**
-   * The portion of edits from the true input keystroke that are not part of the
-   * final entry in `token`.  If `null`, all edits are considered part of the
-   * final token's contents.
-   *
-   * If the final token is new due to a newly-introduced wordboundary traversed
-   * by the keystroke, this will generally be set to an empty transform that
-   * 'finalizes' the previous tail token.
-   *
-   * (Refer to #12494 for an example case.)
-   */
-  readonly taillessTrueKeystroke: Transform;
-
   constructor(priorToClone: ContextTokenization);
   constructor(tokens: ContextToken[]);
-  constructor(tokens: ContextToken[], alignment: TransitionEdge, taillessTrueKeystroke: Transform);
+  constructor(tokens: ContextToken[], alignment: TransitionEdge);
   constructor(
-    param1: ContextToken[] | ContextTokenization,
-    tokenizationPath?: TransitionEdge,
-    taillessTrueKeystroke?: Transform
+    param1: ContextToken[] | ContextTokenization
   ) {
     if(!(param1 instanceof ContextTokenization)) {
       const tokens = param1;
       if(!tokens || tokens.length == 0) {
         throw new Error("ContextTokenization requires at least one existing ContextToken");
       }
       this.tokens = [].concat(tokens);
-      this.taillessTrueKeystroke = taillessTrueKeystroke;
     } else {
       const priorToClone = param1;
       this.tokens = priorToClone.tokens.map((entry) => new ContextToken(entry));
-      this.taillessTrueKeystroke = priorToClone.taillessTrueKeystroke;
     }
   }
 
@@ -391,7 +374,7 @@ export class ContextTokenization {
       tokenization.push(token);
     }
 
-    return new ContextTokenization(this.tokens.slice(0, sliceIndex).concat(tokenization), null, this.taillessTrueKeystroke);
+    return new ContextTokenization(this.tokens.slice(0, sliceIndex).concat(tokenization));
   }
 
   /**
@@ -486,11 +469,7 @@ export class ContextTokenization {
       affectedToken = null;
     }
 
-    return new ContextTokenization(
-      this.tokens.slice(0, sliceIndex).concat(tailTokenization),
-      null,
-      determineTaillessTrueKeystroke(transitionEdge)
-    );
+    return new ContextTokenization(this.tokens.slice(0, sliceIndex).concat(tailTokenization));
   }
 }
 
@@ -1175,59 +1154,4 @@ export function assembleTransforms(stackedInserts: string[], stackedDeletes: num
   }
 
   return transformMap;
-}
-
-/**
- * Used to construct and represent the part of the incoming transform that does
- * not land as part of the final token in the resulting context.  This component
- * should be preserved by any suggestions that get applied.
- * @param tokenizationAnalysis
- * @returns
- */
-export function determineTaillessTrueKeystroke(tokenizationAnalysis: TransitionEdge) {
-  // undefined by default; we haven't yet determined if we're still affecting
-  // the same token that was the tail in the previous tokenization state.
-  let taillessTrueKeystroke: Transform;
-
-  // If tokens were inserted, emit an empty transform; this prevents
-  // suggestions from replacing the "current" token.
-  const bestTokenizedInput = tokenizationAnalysis.inputs[0].sample;
-  if(bestTokenizedInput.has(1)) {
-    // Sets a default transform that will be returned even if the main
-    // transform body lies entirely within a new token.
-    taillessTrueKeystroke = { insert: '', deleteLeft: 0 };
-
-    // While the .size() > 1 case could also land here, it is ALSO covered
-    // by the loop that follows, without fail.
-  }
-
-  const transformKeys = [...tokenizationAnalysis.inputs[0].sample.keys()];
-  transformKeys.pop();
-
-  for(let i of transformKeys) {
-    /*
-      * Thinking ahead to multitokenization:
-      *
-      * If what we have is not on the "true" tokenization, then... we need to
-      * do multitoken effects, right?  We're basing new suggestions based on a
-      * state that does not currently exist!  We'd need to enforce THAT state,
-      * *then* do the suggestion!
-      * - Which gets fun if we auto-apply such a case, as the new "true" tokenization
-      *   no longer results directly from the true input.
-      *
-      * If we give tokens unique IDs on first creation, we could backtrace to
-      * find the most recent common ancestor.
-      * - simple cases (same 'token', but different input transform lengths/effects)
-      *   will have the same prior token ID
-      */
-    const primaryInput = tokenizationAnalysis.inputs[0].sample.get(i);
-    if(!taillessTrueKeystroke) {
-      taillessTrueKeystroke = {...primaryInput};
-    } else {
-      taillessTrueKeystroke.insert += primaryInput.insert;
-      taillessTrueKeystroke.deleteLeft += primaryInput.deleteLeft;
-    }
-  }
-
-  return taillessTrueKeystroke;
 }
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/transition-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/transition-helpers.ts
@@ -130,7 +130,7 @@ export function transitionTokenizations(
 
       // Following call:  is actually designed to build SubstitutionQuotientSpurs.
       const transitionedTokenization = rootTokenization.evaluateTransition(precomp[1], trueInput.id, bestProb);
-      const remadeTokenization = new ContextTokenization(transitionedTokenization.tokens, subset.transitionEdges.get(rootTokenization), transitionedTokenization.taillessTrueKeystroke);
+      const remadeTokenization = new ContextTokenization(transitionedTokenization.tokens, subset.transitionEdges.get(rootTokenization));
 
       // If the last token is empty and has no flag for a revertable transition,
       // attempt to copy the previous token's revertable transition flag.

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -6,7 +6,7 @@ import { searchForProperty, WordBreakProperty } from '@keymanapp/models-wordbrea
 import { TransformUtils } from './transformUtils.js';
 import { detectCurrentCasing, determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js';
 import { ContextTokenLike } from './correction/context-token.js';
-import { ContextTokenization, mapWhitespacedTokenization } from './correction/context-tokenization.js';
+import { ContextTokenization } from './correction/context-tokenization.js';
 import { ContextTracker } from './correction/context-tracker.js';
 import { ContextState, determineContextSlideTransform } from './correction/context-state.js';
 import { ContextTransition } from './correction/context-transition.js';
@@ -181,12 +181,6 @@ export interface PredictionMetadata {
    * available upon initial construction of this type.
    */
   matchLevel?: SuggestionSimilarity;
-
-  /**
-   * Text from the triggering input that should _not_ be affected by the
-   * prediction.
-   */
-  preservationTransform?: Transform;
 }
 
 export interface IntermediateTokenizedPrediction {
@@ -312,21 +306,9 @@ export function determineTraversallessCorrectionSequences(
       suggestionParams.tokens.forEach((token) => token.correction.sample.id = transformId);
     }
 
-    const tokenizationMapping = mapWhitespacedTokenization(tokenization.left.map((t) => { return {exampleInput: t.text, codepointLength: KMWString.length(t.text)} }), lexicalModel, correction.sample);
-    const tokenizedCorrection = tokenizationMapping.tokenizedTransform;
-    const tokenizedCorrectionEntries = [...tokenizedCorrection.values()];
-
-    // IF:  array has multiple entries, then build the preservation-transform as below, including the deleteLeft.
-    // If not, don't make one!
-    const preservationTransform = tokenizedCorrectionEntries.slice(0, -1).reduce((accum, curr) => {
-      return { insert: accum.insert + curr.insert, deleteLeft: accum.deleteLeft + curr.deleteLeft };
-    }, { insert: '', deleteLeft: 0, id: correction.sample.id});
-
     returnedPredictionData.push({
       ...suggestionParams,
-      applyInPost: (p) => {
-        p.metadata.preservationTransform = preservationTransform;
-      }
+      applyInPost: (p) => {}
     })
   }
 
@@ -616,16 +598,9 @@ export function determineTokenizedCorrectionSequence(
     suggestionParams.tokens.forEach((t) => t.correction.sample.id = transition.transitionId);
   }
 
-  const { deleteLeft } = transitionParams;
-
   return {
     ...suggestionParams,
-    applyInPost: (entry: IntermediateTokenizedPrediction) => {
-      entry.metadata.preservationTransform = tokenization.taillessTrueKeystroke;
-      // // Will need an extra lookup layer if the suggestion is generated from within a cluster.
-      // entry.baseTokenization = transition.final.tokenizationSourceMap.get(tokenization);
-      entry.components[0].prediction.transform.deleteLeft = deleteLeft;
-    }
+    applyInPost: (entry) => {}
   };
 }
 
@@ -1328,24 +1303,6 @@ export function finalizeSuggestions(
   const suggestions = deduplicatedSuggestionTuples.map((tuple) => {
     const prediction = tuple.components.prediction;
 
-    // If this is a suggestion after any form of wordbreak input, make sure we preserve any components
-    // from prior tokens!
-    //
-    // Note:  may need adjustment if/when supporting phrase-level correction.
-    if(tuple.metadata.preservationTransform) {
-      const mergedTransform = {
-        ...models.buildMergedTransform(tuple.metadata.preservationTransform, {...prediction.transform, deleteLeft: 0}),
-        deleteLeft: prediction.transform.deleteLeft
-      };
-
-      // Temporarily and locally drops 'readonly' semantics so that we can reassign the transform.
-      // See https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#improved-control-over-mapped-type-modifiers
-      let mutableSuggestion = prediction as {-readonly [transform in keyof Suggestion]: Suggestion[transform]};
-
-      // Assignment via by-reference behavior, as suggestion is an object
-      mutableSuggestion.transform = mergedTransform;
-    }
-
     // Is sometimes not set during unit tests.
     if(prediction.transformId !== undefined) {
       prediction.transform.id = prediction.transformId;

diff --git a/...rc/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts b/...rc/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts
@@ -247,9 +247,6 @@ describe('ContextState', () => {
       let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform));
       assert.isNotNull(newContextMatch?.final);
       assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens);
-      // We want to preserve the added whitespace when predicting a token that follows after it.
-
-      assert.deepEqual(newContextMatch.final.displayTokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 });
 
       // The 'wordbreak' transform
       let state = newContextMatch?.final;
@@ -275,8 +272,6 @@ describe('ContextState', () => {
       let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform));
       assert.isNotNull(newContextMatch?.final);
       assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens);
-      // We want to preserve the added whitespace when predicting a token that follows after it.
-      assert.deepEqual(newContextMatch.final.displayTokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 });
 
       // The 'wordbreak' transform
       let state = newContextMatch?.final;
@@ -319,7 +314,6 @@ describe('ContextState', () => {
       let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform));
       assert.isNotNull(newContextMatch?.final);
       assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens);
-      assert.deepEqual(newContextMatch.final.displayTokenization.taillessTrueKeystroke, { insert: '', deleteLeft: 0 });
 
       // The 'wordbreak' transform
       let state = newContextMatch.final;
@@ -345,8 +339,6 @@ describe('ContextState', () => {
       let newContextMatch = baseState.analyzeTransition(newContext, toWrapperDistribution(transform));
       assert.isNotNull(newContextMatch?.final);
       assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens);
-      // We want to preserve the added whitespace when predicting a token that follows after it.
-      assert.deepEqual(newContextMatch.final.displayTokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 });
 
       // The 'wordbreak' transform
       let state = newContextMatch.final;
@@ -376,8 +368,6 @@ describe('ContextState', () => {
       let newContextMatch = baseState.analyzeTransition(existingContext, [{sample: transform, p: 1}]);
       assert.isNotNull(newContextMatch?.final);
       assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens);
-      // We want to preserve all text preceding the new token when applying a suggestion.
-      assert.deepEqual(newContextMatch.final.displayTokenization.taillessTrueKeystroke, { insert: 'd ', deleteLeft: 0});
 
       // The 'wordbreak' transform
       let state = newContextMatch.final;
@@ -401,8 +391,6 @@ describe('ContextState', () => {
       let newContextMatch = baseState.analyzeTransition(existingContext, [{sample: transform, p: 1}]);
       assert.isNotNull(newContextMatch?.final);
       assert.deepEqual(newContextMatch.final.displayTokenization.tokens.map(token => token.exampleInput), rawTokens);
-      // We want to preserve all text preceding the new token when applying a suggestion.
-      assert.deepEqual(newContextMatch.final.displayTokenization.taillessTrueKeystroke, { insert: 'tor ', deleteLeft: 0 });
 
       // The 'wordbreak' transform
       let state = newContextMatch.final;

diff --git a/.../auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts b/.../auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts
@@ -107,7 +107,7 @@ describe('ContextTokenization', function() {
       const rawTextTokens = ['an', ' ', 'apple', ' ', 'a', ' ', 'day'];
       const tokens = rawTextTokens.map((text => toTransformToken(text)));
 
-      let tokenization = new ContextTokenization(tokens, null, null /* dummy val */);
+      let tokenization = new ContextTokenization(tokens);
 
       assert.deepEqual(tokenization.tokens.map((entry) => entry.exampleInput), rawTextTokens);
       assert.deepEqual(tokenization.tokens.map((entry) => entry.isWhitespace), rawTextTokens.map((entry) => entry == ' '));
@@ -118,7 +118,7 @@ describe('ContextTokenization', function() {
     it('clones', () => {
       const rawTextTokens = ['an', ' ', 'apple', ' ', 'a', ' ', 'day'];
       const tokens = rawTextTokens.map((text => toTransformToken(text)));
-      let baseTokenization = new ContextTokenization(tokens, null, null /* dummy val */);
+      let baseTokenization = new ContextTokenization(tokens);
       let cloned = new ContextTokenization(baseTokenization);
 
       assert.sameOrderedMembers(

diff --git a/...st/auto/headless/engine/predictive-text/worker-thread/context/transition-helpers.tests.ts b/...st/auto/headless/engine/predictive-text/worker-thread/context/transition-helpers.tests.ts
@@ -175,11 +175,7 @@ function generateFixtureForTokenizationOutboundTransition (
 
     // CURRENTLY NOT DONE:  adding new or replacement tokens for text to be placed after 'quotientNodeToExtend'.
 
-    const transitionedTokenization = new ContextTokenization(
-      srcTokenization.tokens.slice(0, srcTokenization.tokens.length - 1 + relativeTailIndex).concat(token),
-      tokenizationEdge,
-      null
-    );
+    const transitionedTokenization = new ContextTokenization(srcTokenization.tokens.slice(0, srcTokenization.tokens.length - 1 + relativeTailIndex).concat(token));
 
     return {
       /**

diff --git a/...ss/engine/predictive-text/worker-thread/correction-search/tokenization-corrector.tests.ts b/...ss/engine/predictive-text/worker-thread/correction-search/tokenization-corrector.tests.ts
@@ -359,7 +359,7 @@ describe('TokenizationCorrector', () => {
         p: 1
       }
       const therefxyz = new ContextToken(new SubstitutionQuotientSpur(therefxy, [zInput], zInput));
-      const therefxyzTokenization = new ContextTokenization([therefxyz], null, null);
+      const therefxyzTokenization = new ContextTokenization([therefxyz]);
 
       const instance = new TokenizationCorrector(
         therefxyzTokenization,

diff --git a/...ve-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts b/...ve-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts
@@ -107,7 +107,6 @@ describe('determineContextTransition', () => {
       assert.equal(transition.final.context.left, targetContext.left);
       assert.equal(transition.final.context.right ?? "", targetContext.right ?? "");
       assert.sameDeepOrderedMembers(transition.inputDistribution, inputDistribution);
-      assert.isNotOk(transition.final.displayTokenization.taillessTrueKeystroke);
       assert.equal(transition.transitionId, 1);
     } finally {
       warningEmitterSpy.restore();