diff --git a/README.md b/README.md index 4d145bc..2531478 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,9 @@ You can customize behavior from `settings.json` with the `extension.markeditProo "SpelledNumbers": false, "NoOxfordComma": true }, - "disabledLintKinds": ["Regionalism"] + "disabledLintKinds": ["Regionalism"], + "dialect": "Australian", + "dialectFallbacks": ["American"] } } ``` @@ -45,6 +47,8 @@ You can customize behavior from `settings.json` with the `extension.markeditProo - `addToDict`: When `true` (default), clicking "Ignore" on a flagged word also adds it to a personal dictionary so it won't be flagged in future sessions. Set to `false` to disable this behavior. - `lintPreset`: `"strict"` (default), `"standard"`, or `"relaxed"`. - `lintRuleOverrides`: Per-rule overrides (`true` / `false` / `null`) applied on top of the preset. +- `dialect`: The English dialect Harper checks against (default: `"American"`). One of `"American"`, `"British"`, `"Australian"`, `"Canadian"`, or `"Indian"`. Determines which spellings are flagged and which spellings suggestions prefer. +- `dialectFallbacks`: Additional dialects whose spellings are also accepted (default: `[]`). A word flagged as a misspelling by `dialect` is kept only if every fallback dialect also flags it; if any fallback accepts the word, it is not flagged. For example, `"dialect": "Australian"` with `"dialectFallbacks": ["American"]` suggests Australian spellings but does not flag American ones. Invalid names, duplicates, and the primary dialect are ignored. Suggestions always come from the primary `dialect`. - `disabledLintKinds`: Additional lint kinds to filter out. Available kinds: - `Agreement`, `BoundaryError`, `Capitalization`, `Eggcorn`, `Enhancement` - `Formatting`, `Grammar`, `Malapropism`, `Miscellaneous`, `Nonstandard` diff --git a/src/fallback.ts b/src/fallback.ts new file mode 100644 index 0000000..2ed0cfd --- /dev/null +++ b/src/fallback.ts @@ -0,0 +1,19 @@ +// Pure helpers for dialect fallback suppression. Deliberately free of any harper.js import +// so it stays fast to unit test; lint.ts feeds it spans extracted from real Harper lints. + +export interface SpanLike { + start: number; + end: number; +} + +export function spanKey(span: SpanLike): string { + return `${span.start}:${span.end}`; +} + +// Decides whether a primary-dialect Spelling lint should survive fallback filtering. +// A misspelling is kept only when every fallback dialect also flags the same span; if any +// fallback considers the word valid (its span is absent), the word is an accepted spelling +// in that dialect and the lint is dropped. +export function keepSpellingLint(key: string, fallbackSpellingSpans: ReadonlySet[]): boolean { + return fallbackSpellingSpans.every(spans => spans.has(key)); +} diff --git a/src/lint.ts b/src/lint.ts index f5bbe8a..ee9fdb8 100644 --- a/src/lint.ts +++ b/src/lint.ts @@ -1,13 +1,25 @@ -import { LocalLinter, binary, type LintConfig } from 'harper.js'; +import { LocalLinter, binary, Dialect, type LintConfig, type Lint } from 'harper.js'; import { MarkEdit } from 'markedit-api'; -import { getProofreadingSettings } from './settings'; +import { getProofreadingSettings, type DialectName } from './settings'; import { presetDisabledRules } from './rules'; import { presetDisabledKinds } from './kinds'; +import { keepSpellingLint, spanKey } from './fallback'; import { loadWords, saveWords } from './dict'; +const dialectByName: Record = { + American: Dialect.American, + British: Dialect.British, + Australian: Dialect.Australian, + Canadian: Dialect.Canadian, + Indian: Dialect.Indian, +}; + const linter = new LocalLinter({ binary }); const settings = getProofreadingSettings(MarkEdit.userSettings); const disabledKinds = resolveDisabledKinds(); +// One extra linter per fallback dialect; only used to test whether a word is an accepted +// spelling in that dialect. Created only when fallbacks are configured. +const fallbackLinters = settings.dialectFallbacks.map(() => new LocalLinter({ binary })); const linterReady = configureLinter().catch(error => { console.warn('[MarkEdit-proofreading] Failed to configure linter.', error); }); @@ -19,11 +31,38 @@ export async function lint(text: string) { const lints = await linter.lint(text); // Post-filter by kind as a safety net for rules not covered by the static lists - if (disabledKinds.size === 0) { - return lints; + const kept = disabledKinds.size === 0 + ? lints + : lints.filter(lint => !disabledKinds.has(lint.lint_kind())); + + if (fallbackLinters.length === 0) { + return kept; } - return lints.filter(lint => !disabledKinds.has(lint.lint_kind())); + return filterByFallbackDialects(text, kept); +} + +// Drops Spelling lints for words that are valid in a configured fallback dialect, so a primary +// dialect of e.g. Australian still accepts American spellings while suggesting Australian ones. +async function filterByFallbackDialects(text: string, lints: Lint[]): Promise { + const fallbackSpellingSpans = await Promise.all( + fallbackLinters.map(async fallback => { + const fallbackLints = await fallback.lint(text); + return new Set( + fallbackLints + .filter(lint => lint.lint_kind() === 'Spelling') + .map(lint => spanKey(lint.span())), + ); + }), + ); + + return lints.filter(lint => { + if (lint.lint_kind() !== 'Spelling') { + return true; + } + + return keepSpellingLint(spanKey(lint.span()), fallbackSpellingSpans); + }); } export async function resetDictionary(): Promise { @@ -54,6 +93,11 @@ function resolveDisabledKinds(): ReadonlySet { } async function configureLinter() { + await linter.setDialect(dialectByName[settings.dialect]); + await Promise.all( + settings.dialectFallbacks.map((name, index) => fallbackLinters[index].setDialect(dialectByName[name])), + ); + const disabledRules = presetDisabledRules(settings.lintPreset); const hasRuleConfig = disabledRules.length > 0 || diff --git a/src/settings.ts b/src/settings.ts index ceb2369..5427321 100644 --- a/src/settings.ts +++ b/src/settings.ts @@ -5,6 +5,12 @@ const settingsKey = 'extension.markeditProofreading'; export type LintPreset = 'strict' | 'standard' | 'relaxed'; +// Dialects supported by Harper. Kept as strings here so this module stays free of any +// harper.js (WebAssembly) import; lint.ts maps these names to the harper.js `Dialect` enum. +export type DialectName = 'American' | 'British' | 'Australian' | 'Canadian' | 'Indian'; + +const dialectNames: readonly DialectName[] = ['American', 'British', 'Australian', 'Canadian', 'Indian']; + type JSONObject = MarkEdit['userSettings']; type JSONValue = JSONObject[string]; @@ -14,6 +20,8 @@ export interface ProofreadingSettings { lintRuleOverrides: LintConfig; disabledLintKinds: string[]; addToDict: boolean; + dialect: DialectName; + dialectFallbacks: DialectName[]; } export function getProofreadingSettings(userSettings: JSONObject | undefined): ProofreadingSettings { @@ -23,6 +31,8 @@ export function getProofreadingSettings(userSettings: JSONObject | undefined): P lintRuleOverrides: {}, disabledLintKinds: [], addToDict: true, + dialect: 'American', + dialectFallbacks: [], }; const root = asObject(userSettings); @@ -40,8 +50,10 @@ export function getProofreadingSettings(userSettings: JSONObject | undefined): P const disabledLintKinds = parseStringArray(raw.disabledLintKinds); const addToDict = raw.addToDict !== false; + const dialect = parseDialect(raw.dialect); + const dialectFallbacks = parseDialectList(raw.dialectFallbacks, dialect); - return { autoLintDelay, lintPreset, lintRuleOverrides, disabledLintKinds, addToDict }; + return { autoLintDelay, lintPreset, lintRuleOverrides, disabledLintKinds, addToDict, dialect, dialectFallbacks }; } function parseLintPreset(value: JSONValue): LintPreset { @@ -60,6 +72,31 @@ function parseAutoLintDelay(value: JSONValue): number { return 1000; } +function parseDialect(value: JSONValue): DialectName { + return isDialectName(value) ? value : 'American'; +} + +// Parses fallback dialects, dropping invalid names, duplicates, and the primary dialect +// (a fallback to the primary itself would be a no-op). +function parseDialectList(value: JSONValue, primary: DialectName): DialectName[] { + if (!Array.isArray(value)) { + return []; + } + + const fallbacks = new Set(); + for (const item of value) { + if (isDialectName(item) && item !== primary) { + fallbacks.add(item); + } + } + + return [...fallbacks]; +} + +function isDialectName(value: JSONValue): value is DialectName { + return typeof value === 'string' && (dialectNames as readonly string[]).includes(value); +} + function asObject(value: JSONValue | undefined): JSONObject | undefined { if (typeof value !== 'object' || value === null || Array.isArray(value)) { return undefined; diff --git a/tests/fallback.test.ts b/tests/fallback.test.ts new file mode 100644 index 0000000..5b182e0 --- /dev/null +++ b/tests/fallback.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, it } from 'vitest'; +import { keepSpellingLint, spanKey } from '../src/fallback'; + +describe('dialect fallback suppression', () => { + it('builds a stable span key', () => { + expect(spanKey({ start: 11, end: 16 })).toBe('11:16'); + }); + + it('keeps a spelling lint when no fallback dialects are configured', () => { + expect(keepSpellingLint('11:16', [])).toBe(true); + }); + + it('drops a spelling that any fallback dialect accepts', () => { + // "color" flagged by Australian primary, but absent from the American fallback set. + const american = new Set(['0:3']); + expect(keepSpellingLint('11:16', [american])).toBe(false); + }); + + it('keeps a genuine misspelling flagged by every fallback dialect', () => { + const american = new Set(['11:16', '25:28']); + const canadian = new Set(['11:16']); + expect(keepSpellingLint('11:16', [american, canadian])).toBe(true); + }); + + it('drops the word unless every fallback flags it', () => { + const american = new Set(['11:16']); + const canadian = new Set(); // Canadian accepts the word + expect(keepSpellingLint('11:16', [american, canadian])).toBe(false); + }); +}); diff --git a/tests/settings.test.ts b/tests/settings.test.ts index 3327fd2..338044d 100644 --- a/tests/settings.test.ts +++ b/tests/settings.test.ts @@ -121,6 +121,58 @@ describe('proofreading settings', () => { }).autoLintDelay).toBe(1000); }); + it('defaults dialect to American with no fallbacks', () => { + const settings = getProofreadingSettings(undefined); + + expect(settings.dialect).toBe('American'); + expect(settings.dialectFallbacks).toEqual([]); + }); + + it('parses a dialect and fallbacks from user settings', () => { + const settings = getProofreadingSettings({ + 'extension.markeditProofreading': { + dialect: 'Australian', + dialectFallbacks: ['American'], + }, + }); + + expect(settings.dialect).toBe('Australian'); + expect(settings.dialectFallbacks).toEqual(['American']); + }); + + it('falls back to American for an unrecognized dialect', () => { + const settings = getProofreadingSettings({ + 'extension.markeditProofreading': { + dialect: 'Klingon', + }, + }); + + expect(settings.dialect).toBe('American'); + }); + + it('drops invalid, duplicate, and primary entries from dialectFallbacks', () => { + const settings = getProofreadingSettings({ + 'extension.markeditProofreading': { + dialect: 'British', + dialectFallbacks: ['American', 'Klingon', 'American', 'British', 42, 'Australian'], + }, + }); + + expect(settings.dialect).toBe('British'); + expect(settings.dialectFallbacks).toEqual(['American', 'Australian']); + }); + + it('ignores dialectFallbacks when it is not an array', () => { + const settings = getProofreadingSettings({ + 'extension.markeditProofreading': { + dialect: 'Australian', + dialectFallbacks: 'American', + }, + }); + + expect(settings.dialectFallbacks).toEqual([]); + }); + it('defaults addToDict to true and allows disabling', () => { expect(getProofreadingSettings(undefined).addToDict).toBe(true);