From 2727871fc9369350a820753c05cead0ee022fee8 Mon Sep 17 00:00:00 2001 From: Sam McLeod Date: Wed, 20 May 2026 10:18:11 +1000 Subject: [PATCH] Add dialect setting with optional fallback dialects Harper can lint against different English dialects, but the extension always used the default (American), so non-American spellings like "colour" and "organisation" were flagged as misspellings with no way to change it. Add a `dialect` setting accepting "American" (default), "British", "Australian", "Canadian", or "Indian". It is passed to Harper via `setDialect`, so it controls both which spellings are flagged and which spellings suggestions prefer. Add a `dialectFallbacks` setting listing additional dialects whose spellings are also accepted. A Spelling lint from the primary dialect is kept only if every fallback dialect also flags the same span; if any fallback accepts the word it is not flagged. This lets you, for example, prefer Australian suggestions while still accepting American spellings. Suggestions always come from the primary dialect. Implementation: - Dialect names are parsed and validated in settings.ts as plain strings, keeping that module free of the harper.js (WebAssembly) import so it stays fast to unit test. Invalid names, duplicates, and the primary dialect are dropped from the fallback list. - lint.ts maps the names to Harper's Dialect enum, sets the dialect on the linter, and creates one extra LocalLinter per fallback dialect used only to test whether a word is an accepted spelling there. - Fallback filtering runs one extra lint pass per fallback dialect and is skipped entirely when none are configured. The keep/drop decision lives in a small pure helper (fallback.ts) so it can be unit tested without loading WebAssembly. Defaults preserve the previous behaviour (American, no fallbacks). README documents both settings; new unit tests cover settings parsing and the fallback decision. --- README.md | 6 ++++- src/fallback.ts | 19 +++++++++++++++ src/lint.ts | 54 ++++++++++++++++++++++++++++++++++++++---- src/settings.ts | 39 +++++++++++++++++++++++++++++- tests/fallback.test.ts | 30 +++++++++++++++++++++++ tests/settings.test.ts | 52 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 193 insertions(+), 7 deletions(-) create mode 100644 src/fallback.ts create mode 100644 tests/fallback.test.ts diff --git a/README.md b/README.md index 4d145bc..2531478 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,9 @@ You can customize behavior from `settings.json` with the `extension.markeditProo "SpelledNumbers": false, "NoOxfordComma": true }, - "disabledLintKinds": ["Regionalism"] + "disabledLintKinds": ["Regionalism"], + "dialect": "Australian", + "dialectFallbacks": ["American"] } } ``` @@ -45,6 +47,8 @@ You can customize behavior from `settings.json` with the `extension.markeditProo - `addToDict`: When `true` (default), clicking "Ignore" on a flagged word also adds it to a personal dictionary so it won't be flagged in future sessions. Set to `false` to disable this behavior. - `lintPreset`: `"strict"` (default), `"standard"`, or `"relaxed"`. - `lintRuleOverrides`: Per-rule overrides (`true` / `false` / `null`) applied on top of the preset. +- `dialect`: The English dialect Harper checks against (default: `"American"`). One of `"American"`, `"British"`, `"Australian"`, `"Canadian"`, or `"Indian"`. Determines which spellings are flagged and which spellings suggestions prefer. +- `dialectFallbacks`: Additional dialects whose spellings are also accepted (default: `[]`). A word flagged as a misspelling by `dialect` is kept only if every fallback dialect also flags it; if any fallback accepts the word, it is not flagged. For example, `"dialect": "Australian"` with `"dialectFallbacks": ["American"]` suggests Australian spellings but does not flag American ones. Invalid names, duplicates, and the primary dialect are ignored. Suggestions always come from the primary `dialect`. - `disabledLintKinds`: Additional lint kinds to filter out. Available kinds: - `Agreement`, `BoundaryError`, `Capitalization`, `Eggcorn`, `Enhancement` - `Formatting`, `Grammar`, `Malapropism`, `Miscellaneous`, `Nonstandard` diff --git a/src/fallback.ts b/src/fallback.ts new file mode 100644 index 0000000..2ed0cfd --- /dev/null +++ b/src/fallback.ts @@ -0,0 +1,19 @@ +// Pure helpers for dialect fallback suppression. Deliberately free of any harper.js import +// so it stays fast to unit test; lint.ts feeds it spans extracted from real Harper lints. + +export interface SpanLike { + start: number; + end: number; +} + +export function spanKey(span: SpanLike): string { + return `${span.start}:${span.end}`; +} + +// Decides whether a primary-dialect Spelling lint should survive fallback filtering. +// A misspelling is kept only when every fallback dialect also flags the same span; if any +// fallback considers the word valid (its span is absent), the word is an accepted spelling +// in that dialect and the lint is dropped. +export function keepSpellingLint(key: string, fallbackSpellingSpans: ReadonlySet[]): boolean { + return fallbackSpellingSpans.every(spans => spans.has(key)); +} diff --git a/src/lint.ts b/src/lint.ts index f5bbe8a..ee9fdb8 100644 --- a/src/lint.ts +++ b/src/lint.ts @@ -1,13 +1,25 @@ -import { LocalLinter, binary, type LintConfig } from 'harper.js'; +import { LocalLinter, binary, Dialect, type LintConfig, type Lint } from 'harper.js'; import { MarkEdit } from 'markedit-api'; -import { getProofreadingSettings } from './settings'; +import { getProofreadingSettings, type DialectName } from './settings'; import { presetDisabledRules } from './rules'; import { presetDisabledKinds } from './kinds'; +import { keepSpellingLint, spanKey } from './fallback'; import { loadWords, saveWords } from './dict'; +const dialectByName: Record = { + American: Dialect.American, + British: Dialect.British, + Australian: Dialect.Australian, + Canadian: Dialect.Canadian, + Indian: Dialect.Indian, +}; + const linter = new LocalLinter({ binary }); const settings = getProofreadingSettings(MarkEdit.userSettings); const disabledKinds = resolveDisabledKinds(); +// One extra linter per fallback dialect; only used to test whether a word is an accepted +// spelling in that dialect. Created only when fallbacks are configured. +const fallbackLinters = settings.dialectFallbacks.map(() => new LocalLinter({ binary })); const linterReady = configureLinter().catch(error => { console.warn('[MarkEdit-proofreading] Failed to configure linter.', error); }); @@ -19,11 +31,38 @@ export async function lint(text: string) { const lints = await linter.lint(text); // Post-filter by kind as a safety net for rules not covered by the static lists - if (disabledKinds.size === 0) { - return lints; + const kept = disabledKinds.size === 0 + ? lints + : lints.filter(lint => !disabledKinds.has(lint.lint_kind())); + + if (fallbackLinters.length === 0) { + return kept; } - return lints.filter(lint => !disabledKinds.has(lint.lint_kind())); + return filterByFallbackDialects(text, kept); +} + +// Drops Spelling lints for words that are valid in a configured fallback dialect, so a primary +// dialect of e.g. Australian still accepts American spellings while suggesting Australian ones. +async function filterByFallbackDialects(text: string, lints: Lint[]): Promise { + const fallbackSpellingSpans = await Promise.all( + fallbackLinters.map(async fallback => { + const fallbackLints = await fallback.lint(text); + return new Set( + fallbackLints + .filter(lint => lint.lint_kind() === 'Spelling') + .map(lint => spanKey(lint.span())), + ); + }), + ); + + return lints.filter(lint => { + if (lint.lint_kind() !== 'Spelling') { + return true; + } + + return keepSpellingLint(spanKey(lint.span()), fallbackSpellingSpans); + }); } export async function resetDictionary(): Promise { @@ -54,6 +93,11 @@ function resolveDisabledKinds(): ReadonlySet { } async function configureLinter() { + await linter.setDialect(dialectByName[settings.dialect]); + await Promise.all( + settings.dialectFallbacks.map((name, index) => fallbackLinters[index].setDialect(dialectByName[name])), + ); + const disabledRules = presetDisabledRules(settings.lintPreset); const hasRuleConfig = disabledRules.length > 0 || diff --git a/src/settings.ts b/src/settings.ts index ceb2369..5427321 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -5,6 +5,12 @@ const settingsKey = 'extension.markeditProofreading'; export type LintPreset = 'strict' | 'standard' | 'relaxed'; +// Dialects supported by Harper. Kept as strings here so this module stays free of any +// harper.js (WebAssembly) import; lint.ts maps these names to the harper.js `Dialect` enum. +export type DialectName = 'American' | 'British' | 'Australian' | 'Canadian' | 'Indian'; + +const dialectNames: readonly DialectName[] = ['American', 'British', 'Australian', 'Canadian', 'Indian']; + type JSONObject = MarkEdit['userSettings']; type JSONValue = JSONObject[string]; @@ -14,6 +20,8 @@ export interface ProofreadingSettings { lintRuleOverrides: LintConfig; disabledLintKinds: string[]; addToDict: boolean; + dialect: DialectName; + dialectFallbacks: DialectName[]; } export function getProofreadingSettings(userSettings: JSONObject | undefined): ProofreadingSettings { @@ -23,6 +31,8 @@ export function getProofreadingSettings(userSettings: JSONObject | undefined): P lintRuleOverrides: {}, disabledLintKinds: [], addToDict: true, + dialect: 'American', + dialectFallbacks: [], }; const root = asObject(userSettings); @@ -40,8 +50,10 @@ export function getProofreadingSettings(userSettings: JSONObject | undefined): P const disabledLintKinds = parseStringArray(raw.disabledLintKinds); const addToDict = raw.addToDict !== false; + const dialect = parseDialect(raw.dialect); + const dialectFallbacks = parseDialectList(raw.dialectFallbacks, dialect); - return { autoLintDelay, lintPreset, lintRuleOverrides, disabledLintKinds, addToDict }; + return { autoLintDelay, lintPreset, lintRuleOverrides, disabledLintKinds, addToDict, dialect, dialectFallbacks }; } function parseLintPreset(value: JSONValue): LintPreset { @@ -60,6 +72,31 @@ function parseAutoLintDelay(value: JSONValue): number { return 1000; } +function parseDialect(value: JSONValue): DialectName { + return isDialectName(value) ? value : 'American'; +} + +// Parses fallback dialects, dropping invalid names, duplicates, and the primary dialect +// (a fallback to the primary itself would be a no-op). +function parseDialectList(value: JSONValue, primary: DialectName): DialectName[] { + if (!Array.isArray(value)) { + return []; + } + + const fallbacks = new Set(); + for (const item of value) { + if (isDialectName(item) && item !== primary) { + fallbacks.add(item); + } + } + + return [...fallbacks]; +} + +function isDialectName(value: JSONValue): value is DialectName { + return typeof value === 'string' && (dialectNames as readonly string[]).includes(value); +} + function asObject(value: JSONValue | undefined): JSONObject | undefined { if (typeof value !== 'object' || value === null || Array.isArray(value)) { return undefined; diff --git a/tests/fallback.test.ts b/tests/fallback.test.ts new file mode 100644 index 0000000..5b182e0 --- /dev/null +++ b/tests/fallback.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, it } from 'vitest'; +import { keepSpellingLint, spanKey } from '../src/fallback'; + +describe('dialect fallback suppression', () => { + it('builds a stable span key', () => { + expect(spanKey({ start: 11, end: 16 })).toBe('11:16'); + }); + + it('keeps a spelling lint when no fallback dialects are configured', () => { + expect(keepSpellingLint('11:16', [])).toBe(true); + }); + + it('drops a spelling that any fallback dialect accepts', () => { + // "color" flagged by Australian primary, but absent from the American fallback set. + const american = new Set(['0:3']); + expect(keepSpellingLint('11:16', [american])).toBe(false); + }); + + it('keeps a genuine misspelling flagged by every fallback dialect', () => { + const american = new Set(['11:16', '25:28']); + const canadian = new Set(['11:16']); + expect(keepSpellingLint('11:16', [american, canadian])).toBe(true); + }); + + it('drops the word unless every fallback flags it', () => { + const american = new Set(['11:16']); + const canadian = new Set(); // Canadian accepts the word + expect(keepSpellingLint('11:16', [american, canadian])).toBe(false); + }); +}); diff --git a/tests/settings.test.ts b/tests/settings.test.ts index 3327fd2..338044d 100644 --- a/tests/settings.test.ts +++ b/tests/settings.test.ts @@ -121,6 +121,58 @@ describe('proofreading settings', () => { }).autoLintDelay).toBe(1000); }); + it('defaults dialect to American with no fallbacks', () => { + const settings = getProofreadingSettings(undefined); + + expect(settings.dialect).toBe('American'); + expect(settings.dialectFallbacks).toEqual([]); + }); + + it('parses a dialect and fallbacks from user settings', () => { + const settings = getProofreadingSettings({ + 'extension.markeditProofreading': { + dialect: 'Australian', + dialectFallbacks: ['American'], + }, + }); + + expect(settings.dialect).toBe('Australian'); + expect(settings.dialectFallbacks).toEqual(['American']); + }); + + it('falls back to American for an unrecognized dialect', () => { + const settings = getProofreadingSettings({ + 'extension.markeditProofreading': { + dialect: 'Klingon', + }, + }); + + expect(settings.dialect).toBe('American'); + }); + + it('drops invalid, duplicate, and primary entries from dialectFallbacks', () => { + const settings = getProofreadingSettings({ + 'extension.markeditProofreading': { + dialect: 'British', + dialectFallbacks: ['American', 'Klingon', 'American', 'British', 42, 'Australian'], + }, + }); + + expect(settings.dialect).toBe('British'); + expect(settings.dialectFallbacks).toEqual(['American', 'Australian']); + }); + + it('ignores dialectFallbacks when it is not an array', () => { + const settings = getProofreadingSettings({ + 'extension.markeditProofreading': { + dialect: 'Australian', + dialectFallbacks: 'American', + }, + }); + + expect(settings.dialectFallbacks).toEqual([]); + }); + it('defaults addToDict to true and allows disabling', () => { expect(getProofreadingSettings(undefined).addToDict).toBe(true);