Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ You can customize behavior from `settings.json` with the `extension.markeditProo
"SpelledNumbers": false,
"NoOxfordComma": true
},
"disabledLintKinds": ["Regionalism"]
"disabledLintKinds": ["Regionalism"],
"dialect": "Australian",
"dialectFallbacks": ["American"]
}
}
```
Expand All @@ -45,6 +47,8 @@ You can customize behavior from `settings.json` with the `extension.markeditProo
- `addToDict`: When `true` (default), clicking "Ignore" on a flagged word also adds it to a personal dictionary so it won't be flagged in future sessions. Set to `false` to disable this behavior.
- `lintPreset`: `"strict"` (default), `"standard"`, or `"relaxed"`.
- `lintRuleOverrides`: Per-rule overrides (`true` / `false` / `null`) applied on top of the preset.
- `dialect`: The English dialect Harper checks against (default: `"American"`). One of `"American"`, `"British"`, `"Australian"`, `"Canadian"`, or `"Indian"`. Determines which spellings are flagged and which spellings suggestions prefer.
- `dialectFallbacks`: Additional dialects whose spellings are also accepted (default: `[]`). A word flagged as a misspelling by `dialect` is kept only if every fallback dialect also flags it; if any fallback accepts the word, it is not flagged. For example, `"dialect": "Australian"` with `"dialectFallbacks": ["American"]` suggests Australian spellings but does not flag American ones. Invalid names, duplicates, and the primary dialect are ignored. Suggestions always come from the primary `dialect`.
- `disabledLintKinds`: Additional lint kinds to filter out. Available kinds:
- `Agreement`, `BoundaryError`, `Capitalization`, `Eggcorn`, `Enhancement`
- `Formatting`, `Grammar`, `Malapropism`, `Miscellaneous`, `Nonstandard`
Expand Down
19 changes: 19 additions & 0 deletions src/fallback.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Pure helpers for dialect fallback suppression. Deliberately free of any harper.js import
// so it stays fast to unit test; lint.ts feeds it spans extracted from real Harper lints.

export interface SpanLike {
start: number;
end: number;
}

export function spanKey(span: SpanLike): string {
return `${span.start}:${span.end}`;
}

// Decides whether a primary-dialect Spelling lint should survive fallback filtering.
// A misspelling is kept only when every fallback dialect also flags the same span; if any
// fallback considers the word valid (its span is absent), the word is an accepted spelling
// in that dialect and the lint is dropped.
export function keepSpellingLint(key: string, fallbackSpellingSpans: ReadonlySet<string>[]): boolean {
return fallbackSpellingSpans.every(spans => spans.has(key));
}
54 changes: 49 additions & 5 deletions src/lint.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
import { LocalLinter, binary, type LintConfig } from 'harper.js';
import { LocalLinter, binary, Dialect, type LintConfig, type Lint } from 'harper.js';
import { MarkEdit } from 'markedit-api';
import { getProofreadingSettings } from './settings';
import { getProofreadingSettings, type DialectName } from './settings';
import { presetDisabledRules } from './rules';
import { presetDisabledKinds } from './kinds';
import { keepSpellingLint, spanKey } from './fallback';
import { loadWords, saveWords } from './dict';

const dialectByName: Record<DialectName, Dialect> = {
American: Dialect.American,
British: Dialect.British,
Australian: Dialect.Australian,
Canadian: Dialect.Canadian,
Indian: Dialect.Indian,
};

const linter = new LocalLinter({ binary });
const settings = getProofreadingSettings(MarkEdit.userSettings);
const disabledKinds = resolveDisabledKinds();
// One extra linter per fallback dialect; only used to test whether a word is an accepted
// spelling in that dialect. Created only when fallbacks are configured.
const fallbackLinters = settings.dialectFallbacks.map(() => new LocalLinter({ binary }));
const linterReady = configureLinter().catch(error => {
console.warn('[MarkEdit-proofreading] Failed to configure linter.', error);
});
Expand All @@ -19,11 +31,38 @@ export async function lint(text: string) {
const lints = await linter.lint(text);

// Post-filter by kind as a safety net for rules not covered by the static lists
if (disabledKinds.size === 0) {
return lints;
const kept = disabledKinds.size === 0
? lints
: lints.filter(lint => !disabledKinds.has(lint.lint_kind()));

if (fallbackLinters.length === 0) {
return kept;
}

return lints.filter(lint => !disabledKinds.has(lint.lint_kind()));
return filterByFallbackDialects(text, kept);
}

// Drops Spelling lints for words that are valid in a configured fallback dialect, so a primary
// dialect of e.g. Australian still accepts American spellings while suggesting Australian ones.
async function filterByFallbackDialects(text: string, lints: Lint[]): Promise<Lint[]> {
const fallbackSpellingSpans = await Promise.all(
fallbackLinters.map(async fallback => {
const fallbackLints = await fallback.lint(text);
return new Set(
fallbackLints
.filter(lint => lint.lint_kind() === 'Spelling')
.map(lint => spanKey(lint.span())),
);
}),
);

return lints.filter(lint => {
if (lint.lint_kind() !== 'Spelling') {
return true;
}

return keepSpellingLint(spanKey(lint.span()), fallbackSpellingSpans);
});
}

export async function resetDictionary(): Promise<void> {
Expand Down Expand Up @@ -54,6 +93,11 @@ function resolveDisabledKinds(): ReadonlySet<string> {
}

async function configureLinter() {
await linter.setDialect(dialectByName[settings.dialect]);
await Promise.all(
settings.dialectFallbacks.map((name, index) => fallbackLinters[index].setDialect(dialectByName[name])),
);

const disabledRules = presetDisabledRules(settings.lintPreset);
const hasRuleConfig =
disabledRules.length > 0 ||
Expand Down
39 changes: 38 additions & 1 deletion src/settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ const settingsKey = 'extension.markeditProofreading';

export type LintPreset = 'strict' | 'standard' | 'relaxed';

// Dialects supported by Harper. Kept as strings here so this module stays free of any
// harper.js (WebAssembly) import; lint.ts maps these names to the harper.js `Dialect` enum.
export type DialectName = 'American' | 'British' | 'Australian' | 'Canadian' | 'Indian';

const dialectNames: readonly DialectName[] = ['American', 'British', 'Australian', 'Canadian', 'Indian'];

type JSONObject = MarkEdit['userSettings'];
type JSONValue = JSONObject[string];

Expand All @@ -14,6 +20,8 @@ export interface ProofreadingSettings {
lintRuleOverrides: LintConfig;
disabledLintKinds: string[];
addToDict: boolean;
dialect: DialectName;
dialectFallbacks: DialectName[];
}

export function getProofreadingSettings(userSettings: JSONObject | undefined): ProofreadingSettings {
Expand All @@ -23,6 +31,8 @@ export function getProofreadingSettings(userSettings: JSONObject | undefined): P
lintRuleOverrides: {},
disabledLintKinds: [],
addToDict: true,
dialect: 'American',
dialectFallbacks: [],
};

const root = asObject(userSettings);
Expand All @@ -40,8 +50,10 @@ export function getProofreadingSettings(userSettings: JSONObject | undefined): P

const disabledLintKinds = parseStringArray(raw.disabledLintKinds);
const addToDict = raw.addToDict !== false;
const dialect = parseDialect(raw.dialect);
const dialectFallbacks = parseDialectList(raw.dialectFallbacks, dialect);

return { autoLintDelay, lintPreset, lintRuleOverrides, disabledLintKinds, addToDict };
return { autoLintDelay, lintPreset, lintRuleOverrides, disabledLintKinds, addToDict, dialect, dialectFallbacks };
}

function parseLintPreset(value: JSONValue): LintPreset {
Expand All @@ -60,6 +72,31 @@ function parseAutoLintDelay(value: JSONValue): number {
return 1000;
}

function parseDialect(value: JSONValue): DialectName {
return isDialectName(value) ? value : 'American';
}

// Parses fallback dialects, dropping invalid names, duplicates, and the primary dialect
// (a fallback to the primary itself would be a no-op).
function parseDialectList(value: JSONValue, primary: DialectName): DialectName[] {
if (!Array.isArray(value)) {
return [];
}

const fallbacks = new Set<DialectName>();
for (const item of value) {
if (isDialectName(item) && item !== primary) {
fallbacks.add(item);
}
}

return [...fallbacks];
}

function isDialectName(value: JSONValue): value is DialectName {
return typeof value === 'string' && (dialectNames as readonly string[]).includes(value);
}

function asObject(value: JSONValue | undefined): JSONObject | undefined {
if (typeof value !== 'object' || value === null || Array.isArray(value)) {
return undefined;
Expand Down
30 changes: 30 additions & 0 deletions tests/fallback.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import { describe, expect, it } from 'vitest';
import { keepSpellingLint, spanKey } from '../src/fallback';

describe('dialect fallback suppression', () => {
it('builds a stable span key', () => {
expect(spanKey({ start: 11, end: 16 })).toBe('11:16');
});

it('keeps a spelling lint when no fallback dialects are configured', () => {
expect(keepSpellingLint('11:16', [])).toBe(true);
});

it('drops a spelling that any fallback dialect accepts', () => {
// "color" flagged by Australian primary, but absent from the American fallback set.
const american = new Set(['0:3']);
expect(keepSpellingLint('11:16', [american])).toBe(false);
});

it('keeps a genuine misspelling flagged by every fallback dialect', () => {
const american = new Set(['11:16', '25:28']);
const canadian = new Set(['11:16']);
expect(keepSpellingLint('11:16', [american, canadian])).toBe(true);
});

it('drops the word unless every fallback flags it', () => {
const american = new Set(['11:16']);
const canadian = new Set<string>(); // Canadian accepts the word
expect(keepSpellingLint('11:16', [american, canadian])).toBe(false);
});
});
52 changes: 52 additions & 0 deletions tests/settings.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,58 @@ describe('proofreading settings', () => {
}).autoLintDelay).toBe(1000);
});

it('defaults dialect to American with no fallbacks', () => {
const settings = getProofreadingSettings(undefined);

expect(settings.dialect).toBe('American');
expect(settings.dialectFallbacks).toEqual([]);
});

it('parses a dialect and fallbacks from user settings', () => {
const settings = getProofreadingSettings({
'extension.markeditProofreading': {
dialect: 'Australian',
dialectFallbacks: ['American'],
},
});

expect(settings.dialect).toBe('Australian');
expect(settings.dialectFallbacks).toEqual(['American']);
});

it('falls back to American for an unrecognized dialect', () => {
const settings = getProofreadingSettings({
'extension.markeditProofreading': {
dialect: 'Klingon',
},
});

expect(settings.dialect).toBe('American');
});

it('drops invalid, duplicate, and primary entries from dialectFallbacks', () => {
const settings = getProofreadingSettings({
'extension.markeditProofreading': {
dialect: 'British',
dialectFallbacks: ['American', 'Klingon', 'American', 'British', 42, 'Australian'],
},
});

expect(settings.dialect).toBe('British');
expect(settings.dialectFallbacks).toEqual(['American', 'Australian']);
});

it('ignores dialectFallbacks when it is not an array', () => {
const settings = getProofreadingSettings({
'extension.markeditProofreading': {
dialect: 'Australian',
dialectFallbacks: 'American',
},
});

expect(settings.dialectFallbacks).toEqual([]);
});

it('defaults addToDict to true and allows disabling', () => {
expect(getProofreadingSettings(undefined).addToDict).toBe(true);

Expand Down