diff --git a/docs/.gitignore b/docs/.gitignore
new file mode 100644
index 00000000..ddce69b6
--- /dev/null
+++ b/docs/.gitignore
@@ -0,0 +1,3 @@
+node_modules/
+dist/
+.astro/
diff --git a/docs/astro.config.mjs b/docs/astro.config.mjs
new file mode 100644
index 00000000..5ffe816e
--- /dev/null
+++ b/docs/astro.config.mjs
@@ -0,0 +1,63 @@
+// @ts-check
+import { defineConfig } from "astro/config";
+import starlight from "@astrojs/starlight";
+
+// https://astro.build/config
+export default defineConfig({
+  integrations: [
+    starlight({
+      title: "ODict",
+      description:
+        "The lightning-fast open-source dictionary file format for human languages",
+      social: [
+        {
+          icon: "github",
+          label: "GitHub",
+          href: "https://github.com/TheOpenDictionary/odict",
+        },
+      ],
+      editLink: {
+        baseUrl:
+          "https://github.com/TheOpenDictionary/odict/edit/main/docs/",
+      },
+      sidebar: [
+        {
+          label: "Getting Started",
+          items: [
+            { label: "Introduction", slug: "getting-started/introduction" },
+            { label: "Installation", slug: "getting-started/installation" },
+            { label: "Quick Start", slug: "getting-started/quickstart" },
+          ],
+        },
+        {
+          label: "XML Schema",
+          items: [
+            { label: "Overview", slug: "schema/overview" },
+            { label: "Reference", slug: "schema/reference" },
+          ],
+        },
+        {
+          label: "Guides",
+          items: [
+            { label: "Compiling Dictionaries", slug: "guides/compiling" },
+            { label: "Looking Up Entries", slug: "guides/lookup" },
+            { label: "Searching Dictionaries", slug: "guides/search" },
+            { label: "Tokenizing Text", slug: "guides/tokenize" },
+          ],
+        },
+        {
+          label: "CLI",
+          items: [{ label: "Command Reference", slug: "cli/reference" }],
+        },
+        {
+          label: "API Reference",
+          items: [
+            { label: "Rust", slug: "api/rust" },
+            { label: "Python", slug: "api/python" },
+            { label: "JavaScript", slug: "api/javascript" },
+          ],
+        },
+      ],
+    }),
+  ],
+});
diff --git a/docs/package.json b/docs/package.json
new file mode 100644
index 00000000..e4eba79e
--- /dev/null
+++ b/docs/package.json
@@ -0,0 +1,19 @@
+{
+  "name": "@odict/docs",
+  "type": "module",
+  "version": "0.0.1",
+  "private": true,
+  "scripts": {
+    "dev": "npm run generate && astro dev",
+    "start": "npm run generate && astro dev",
+    "build": "npm run generate && astro build",
+    "preview": "astro preview",
+    "astro": "astro",
+    "generate": "node scripts/generate-schema-docs.mjs && node scripts/generate-cli-docs.mjs"
+  },
+  "dependencies": {
+    "@astrojs/starlight": "^0.32.0",
+    "astro": "^5.3.0",
+    "sharp": "^0.33.0"
+  }
+}
diff --git a/docs/python-api-generated.md b/docs/python-api-generated.md
new file mode 100644
index 00000000..93d0c99c
--- /dev/null
+++ b/docs/python-api-generated.md
@@ -0,0 +1,450 @@
+# Python API
+
+*Auto-generated from Rust doc comments.*
+
+---
+
+## Functions
+
+### `compile()`
+
+Compiles an ODXML string into binary `.odict` data.
+
+Takes an XML string conforming to the ODict XML schema and returns
+the compiled binary representation as a byte vector. The resulting
+bytes can be passed to [`OpenDictionary::new`] or saved to disk.
+
+# Errors
+
+Returns an error if the XML is malformed or does not conform to the
+ODict schema.
+
+## `OpenDictionary`
+
+The main class for working with compiled ODict dictionaries.
+
+An `OpenDictionary` wraps a compiled binary dictionary and provides
+methods for looking up terms, full-text search, tokenization, and more.
+
+# Construction
+
+Create from compiled bytes or an XML string using [`OpenDictionary::new`],
+or load from a file path or remote registry using [`OpenDictionary::load`].
+
+### Methods
+
+#### `load()`
+
+Loads a dictionary from a file path, alias, or remote identifier.
+
+This is an async method. If `dictionary` is a path to a `.odict` file,
+it loads from disk. If it matches the format `org/lang` (e.g. `wiktionary/eng`),
+it downloads from the remote registry.
+
+#### `new()`
+
+Creates a dictionary from compiled binary data or directly from an XML string.
+
+Accepts either `bytes` (as returned by [`compile`]) or a `str` containing
+ODXML markup.
+
+#### `save()`
+
+Saves the dictionary to disk as a `.odict` file.
+
+Optionally configure Brotli compression via `quality` (0–11) and
+`window_size` (0–22).
+
+#### `min_rank()`
+
+The minimum rank value across all entries, or `None` if no entries have ranks.
+
+#### `max_rank()`
+
+The maximum rank value across all entries, or `None` if no entries have ranks.
+
+#### `lookup()`
+
+Looks up one or more terms by exact match.
+
+- `query` — a single term or list of terms to look up.
+- `split` — minimum word length for compound splitting.
+- `follow` — follow `see_also` cross-references until an entry with etymologies is found.
+- `insensitive` — enable case-insensitive matching.
+
+#### `lexicon()`
+
+Returns all terms defined in the dictionary, sorted alphabetically.
+
+#### `index()`
+
+Creates a full-text search index for the dictionary.
+
+Must be called before [`OpenDictionary::search`].
+
+#### `search()`
+
+Runs a full-text search across the dictionary.
+
+Requires an index — call [`OpenDictionary::index`] first.
+
+#### `tokenize()`
+
+Tokenizes text using NLP-based segmentation and matches each token against the dictionary.
+
+Supports Chinese, Japanese, Korean, Thai, Khmer, German, Swedish,
+and Latin-script languages.
+
+- `text` — the text to tokenize.
+- `follow` — follow `see_also` cross-references. Accepts `True`/`False` or a number (nonzero = follow).
+- `insensitive` — enable case-insensitive matching.
+
+---
+
+## `Definition`
+
+A single definition of a word sense.
+
+Contains the definition text along with optional examples and notes.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `id` | `str | None` | Optional identifier for this definition. |
+| `value` | `str` | The definition text. |
+| `examples` | `list[Example]` | Usage examples illustrating this definition. |
+| `notes` | `list[Note]` | Additional notes about this definition. |
+
+---
+
+## `Entry`
+
+A dictionary entry representing a single headword and its associated data.
+
+Each entry contains the term itself, optional ranking metadata,
+cross-reference information, etymologies, and media attachments.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `term` | `str` | The headword for this entry. |
+| `rank` | `int | None` | Optional frequency rank for ordering entries. |
+| `see_also` | `str | None` | Cross-reference target term, if this entry redirects to another. |
+| `etymologies` | `list[Etymology]` | The etymologies associated with this entry. |
+| `media` | `list[MediaURL]` | Media URLs (audio, images, etc.) associated with this entry. |
+
+---
+
+## `EnumWrapper`
+
+A wrapper for ODict enumeration values (e.g. part of speech, pronunciation kind).
+
+ODict enums are represented as string triples: the enum name,
+the variant name, and the variant's string value.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `name` | `str` | The enum type name (e.g. `"PartOfSpeech"`). |
+| `variant` | `str` | The variant name (e.g. `"Noun"`). |
+| `value` | `str` | The string value of the variant (e.g. `"n"`). |
+
+---
+
+## `Etymology`
+
+An etymology grouping for a dictionary entry.
+
+Etymologies group together senses that share a common word origin.
+Each etymology can have its own pronunciations and description.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `id` | `str | None` | Optional identifier for this etymology. |
+| `pronunciations` | `list[Pronunciation]` | Pronunciations associated with this etymology. |
+| `description` | `str | None` | Optional description of the word origin. |
+| `senses` | `list[Sense]` | The senses (meanings) under this etymology. |
+
+---
+
+## `Example`
+
+A usage example illustrating a definition.
+
+Examples can optionally include translations and pronunciations.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `value` | `str` | The example text. |
+| `translations` | `list[Translation]` | Translations of this example into other languages. |
+| `pronunciations` | `list[Pronunciation]` | Pronunciations for this example. |
+
+---
+
+## `Form`
+
+An inflected or alternate form of a word.
+
+Forms represent morphological variants such as plurals, conjugations,
+or other inflections.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `term` | `str` | The inflected form text. |
+| `kind` | `EnumWrapper | None` | The kind of form (e.g. plural, past tense), or `None`. |
+| `tags` | `list[str]` | Tags for categorizing this form. |
+
+---
+
+## `Group`
+
+A named group of related definitions.
+
+Groups allow organizing multiple definitions under a shared description,
+such as grouping definitions by semantic domain.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `id` | `str | None` | Optional identifier for this group. |
+| `description` | `str` | A description of what this group of definitions has in common. |
+| `definitions` | `list[Definition]` | The definitions within this group. |
+
+---
+
+## `IndexOptions`
+
+Options for configuring full-text index creation.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `directory` | `str | None` | Custom directory for storing the index. |
+| `memory` | `int | None` | Memory arena size per thread in bytes (must be >15 MB). |
+| `overwrite` | `bool | None` | Whether to overwrite an existing index. |
+
+---
+
+## `RemoteLoadOptions`
+
+Options for loading dictionaries from remote registries.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `out_dir` | `str | None` | Custom output directory for downloaded files. |
+| `caching` | `bool | None` | Whether to cache downloaded dictionaries locally. |
+| `retries` | `int | None` | Number of download retries on failure. |
+
+---
+
+## `LoadOptions`
+
+Options for loading a dictionary from a file path, alias, or remote registry.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `config_dir` | `str | None` | Custom configuration directory. |
+| `remote` | `RemoteLoadOptions | None` | Options for remote dictionary loading. |
+
+---
+
+## `LookupOptions`
+
+Options for configuring term lookups.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `split` | `int | None` | Minimum word length for compound splitting. |
+| `follow` | `bool | None` | Whether to follow `see_also` cross-references. |
+| `insensitive` | `bool | None` | Whether to enable case-insensitive matching. |
+
+---
+
+## `LookupResult`
+
+The result of a dictionary lookup.
+
+Contains the matched entry and, if a `see_also` redirect was followed,
+the original entry that initiated the redirect.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `entry` | `Entry` | The matched dictionary entry. |
+| `directed_from` | `Entry | None` | The original entry if a `see_also` redirect was followed, or `None`. |
+
+---
+
+## `MediaURL`
+
+A reference to an external media resource (audio, image, etc.).
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `src` | `str` | URL or path to the media file. |
+| `mime_type` | `str | None` | MIME type (e.g. `audio/mpeg`), or `None`. |
+| `description` | `str | None` | Human-readable description of the media. |
+
+---
+
+## `Note`
+
+An additional note attached to a definition.
+
+Notes provide supplementary information such as usage guidance,
+historical context, or grammatical remarks.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `id` | `str | None` | Optional identifier for this note. |
+| `value` | `str` | The note text. |
+| `examples` | `list[Example]` | Examples associated with this note. |
+
+---
+
+## `Pronunciation`
+
+A pronunciation entry for a word or etymology.
+
+Represents how a word is pronounced in a given notation system
+(e.g. IPA, Pinyin), with optional audio media.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `kind` | `EnumWrapper | None` | The pronunciation system (e.g. IPA, Pinyin), or `None`. |
+| `value` | `str` | The pronunciation notation string. |
+| `media` | `list[MediaURL]` | Audio media URLs for this pronunciation. |
+
+---
+
+## `CompressOptions`
+
+Brotli compression options for saving dictionaries.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `quality` | `int | None` | Compression quality level (0–11). |
+| `window_size` | `int | None` | Compression window size (0–22). |
+
+---
+
+## `SaveOptions`
+
+Options for saving a dictionary to disk.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `compress` | `CompressOptions | None` | Optional Brotli compression settings. |
+
+---
+
+## `SearchOptions`
+
+Options for configuring full-text search.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `directory` | `str | None` | Custom directory for the search index. |
+| `threshold` | `int | None` | Relevance score threshold for filtering results. |
+| `autoindex` | `bool | None` | Whether to automatically create an index if one does not exist. |
+| `limit` | `int | None` | Maximum number of results to return. |
+
+---
+
+## `Sense`
+
+A word sense — a specific meaning grouped by part of speech.
+
+Senses represent distinct meanings of a word under a given etymology.
+Each sense has a part of speech and contains definitions (or definition groups),
+along with optional tags, translations, and inflected forms.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `pos` | `EnumWrapper` | The part of speech for this sense (e.g. noun, verb, adjective). |
+| `lemma` | `str | None` | Optional lemma reference linking to another entry. |
+| `definitions` | `list[Definition | Group]` | Definitions or definition groups under this sense. |
+| `tags` | `list[str]` | Tags for categorizing or filtering this sense. |
+| `translations` | `list[Translation]` | Translations of this sense into other languages. |
+| `forms` | `list[Form]` | Inflected forms of the word under this sense. |
+
+---
+
+## `Token`
+
+A token produced by NLP-based text segmentation.
+
+Each token represents a segment of the input text, with metadata about
+its position, detected language and script, and any matching dictionary entries.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `lemma` | `str` | The original token text (lemma form). |
+| `language` | `str | None` | Detected language code (e.g. `"eng"`), or `None` if unknown. |
+| `entries` | `list[LookupResult]` | Matched dictionary entries for this token. |
+| `kind` | `str` | The token kind (e.g. `"Word"`, `"Punctuation"`). |
+| `script` | `str` | Detected script name (e.g. `"Latin"`, `"Han"`). |
+| `start` | `int` | Start byte offset in the original text. |
+| `end` | `int` | End byte offset in the original text. |
+
+---
+
+## `TokenizeOptions`
+
+Options for configuring text tokenization.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `follow` | `bool | int | None` | Whether to follow `see_also` cross-references. Accepts `True`/`False` or a number (nonzero = follow). |
+| `insensitive` | `bool | None` | Whether to enable case-insensitive matching. |
+
+---
+
+## `Translation`
+
+A translation of a word, definition, or example into another language.
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `lang` | `str` | The BCP-47 language code (e.g. `"fra"`, `"deu"`). |
+| `value` | `str` | The translated text. |
+
+---
diff --git a/docs/rustdoc-json-output.md b/docs/rustdoc-json-output.md
new file mode 100644
index 00000000..f230d948
--- /dev/null
+++ b/docs/rustdoc-json-output.md
@@ -0,0 +1,275 @@
+# ODict Python API (from rustdoc JSON)
+
+*Generated from rustdoc JSON format v57*
+
+## `CompressOptions`
+
+Brotli compression options for saving dictionaries.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `quality` | `?<u32>` | Compression quality level (0–11). |
+| `window_size` | `?<u32>` | Compression window size (0–22). |
+
+## `Definition`
+
+A single definition of a word sense.
+
+Contains the definition text along with optional examples and notes.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | `?<?>` | Optional identifier for this definition. |
+| `value` | `?` | The definition text. |
+| `examples` | `?<?>` | Usage examples illustrating this definition. |
+| `notes` | `?<?>` | Additional notes about this definition. |
+
+## `Entry`
+
+A dictionary entry representing a single headword and its associated data.
+
+Each entry contains the term itself, optional ranking metadata,
+cross-reference information, etymologies, and media attachments.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `term` | `?` | The headword for this entry. |
+| `rank` | `?<u32>` | Optional frequency rank for ordering entries. |
+| `see_also` | `?<?>` | Cross-reference target term, if this entry redirects to another. |
+| `etymologies` | `?<?>` | The etymologies associated with this entry. |
+| `media` | `?<?>` | Media URLs (audio, images, etc.) associated with this entry. |
+
+## `EnumWrapper`
+
+A wrapper for ODict enumeration values (e.g. part of speech, pronunciation kind).
+
+ODict enums are represented as string triples: the enum name,
+the variant name, and the variant's string value.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | `?` | The enum type name (e.g. `"PartOfSpeech"`). |
+| `variant` | `?` | The variant name (e.g. `"Noun"`). |
+| `value` | `?` | The string value of the variant (e.g. `"n"`). |
+
+## `Etymology`
+
+An etymology grouping for a dictionary entry.
+
+Etymologies group together senses that share a common word origin.
+Each etymology can have its own pronunciations and description.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | `?<?>` | Optional identifier for this etymology. |
+| `pronunciations` | `?<?>` | Pronunciations associated with this etymology. |
+| `description` | `?<?>` | Optional description of the word origin. |
+| `senses` | `?<?>` | The senses (meanings) under this etymology. |
+
+## `Example`
+
+A usage example illustrating a definition.
+
+Examples can optionally include translations and pronunciations.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `value` | `?` | The example text. |
+| `translations` | `?<?>` | Translations of this example into other languages. |
+| `pronunciations` | `?<?>` | Pronunciations for this example. |
+
+## `Form`
+
+An inflected or alternate form of a word.
+
+Forms represent morphological variants such as plurals, conjugations,
+or other inflections.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `term` | `?` | The inflected form text. |
+| `kind` | `?<?>` | The kind of form (e.g. plural, past tense), or `None`. |
+| `tags` | `?<?>` | Tags for categorizing this form. |
+
+## `Group`
+
+A named group of related definitions.
+
+Groups allow organizing multiple definitions under a shared description,
+such as grouping definitions by semantic domain.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | `?<?>` | Optional identifier for this group. |
+| `description` | `?` | A description of what this group of definitions has in common. |
+| `definitions` | `?<?>` | The definitions within this group. |
+
+## `IndexOptions`
+
+Options for configuring full-text index creation.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `directory` | `?<?>` | Custom directory for storing the index. |
+| `memory` | `?<usize>` | Memory arena size per thread in bytes (must be >15 MB). |
+| `overwrite` | `?<bool>` | Whether to overwrite an existing index. |
+
+## `LoadOptions`
+
+Options for loading a dictionary from a file path, alias, or remote registry.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `config_dir` | `?<?>` | Custom configuration directory. |
+| `remote` | `?<?>` | Options for remote dictionary loading. |
+
+## `LookupOptions`
+
+Options for configuring term lookups.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `split` | `?<u32>` | Minimum word length for compound splitting. |
+| `follow` | `?<bool>` | Whether to follow `see_also` cross-references. |
+| `insensitive` | `?<bool>` | Whether to enable case-insensitive matching. |
+
+## `LookupResult`
+
+The result of a dictionary lookup.
+
+Contains the matched entry and, if a `see_also` redirect was followed,
+the original entry that initiated the redirect.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `entry` | `?` | The matched dictionary entry. |
+| `directed_from` | `?<?>` | The original entry if a `see_also` redirect was followed, or `None`. |
+
+## `MediaURL`
+
+A reference to an external media resource (audio, image, etc.).
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `src` | `?` | URL or path to the media file. |
+| `mime_type` | `?<?>` | MIME type (e.g. `audio/mpeg`), or `None`. |
+| `description` | `?<?>` | Human-readable description of the media. |
+
+## `Note`
+
+An additional note attached to a definition.
+
+Notes provide supplementary information such as usage guidance,
+historical context, or grammatical remarks.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | `?<?>` | Optional identifier for this note. |
+| `value` | `?` | The note text. |
+| `examples` | `?<?>` | Examples associated with this note. |
+
+## `OpenDictionary`
+
+The main class for working with compiled ODict dictionaries.
+
+An `OpenDictionary` wraps a compiled binary dictionary and provides
+methods for looking up terms, full-text search, tokenization, and more.
+
+# Construction
+
+Create from compiled bytes or an XML string using [`OpenDictionary::new`],
+or load from a file path or remote registry using [`OpenDictionary::load`].
+
+## `Pronunciation`
+
+A pronunciation entry for a word or etymology.
+
+Represents how a word is pronounced in a given notation system
+(e.g. IPA, Pinyin), with optional audio media.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `kind` | `?<?>` | The pronunciation system (e.g. IPA, Pinyin), or `None`. |
+| `value` | `?` | The pronunciation notation string. |
+| `media` | `?<?>` | Audio media URLs for this pronunciation. |
+
+## `RemoteLoadOptions`
+
+Options for loading dictionaries from remote registries.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `out_dir` | `?<?>` | Custom output directory for downloaded files. |
+| `caching` | `?<bool>` | Whether to cache downloaded dictionaries locally. |
+| `retries` | `?<u32>` | Number of download retries on failure. |
+
+## `SaveOptions`
+
+Options for saving a dictionary to disk.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `compress` | `?<?>` | Optional Brotli compression settings. |
+
+## `SearchOptions`
+
+Options for configuring full-text search.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `directory` | `?<?>` | Custom directory for the search index. |
+| `threshold` | `?<u32>` | Relevance score threshold for filtering results. |
+| `autoindex` | `?<bool>` | Whether to automatically create an index if one does not exist. |
+| `limit` | `?<usize>` | Maximum number of results to return. |
+
+## `Sense`
+
+A word sense — a specific meaning grouped by part of speech.
+
+Senses represent distinct meanings of a word under a given etymology.
+Each sense has a part of speech and contains definitions (or definition groups),
+along with optional tags, translations, and inflected forms.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `pos` | `?` | The part of speech for this sense (e.g. noun, verb, adjective). |
+| `lemma` | `?<?>` | Optional lemma reference linking to another entry. |
+| `definitions` | `?<?<?, ?>>` | Definitions or definition groups under this sense. |
+| `tags` | `?<?>` | Tags for categorizing or filtering this sense. |
+| `translations` | `?<?>` | Translations of this sense into other languages. |
+| `forms` | `?<?>` | Inflected forms of the word under this sense. |
+
+## `Token`
+
+A token produced by NLP-based text segmentation.
+
+Each token represents a segment of the input text, with metadata about
+its position, detected language and script, and any matching dictionary entries.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `lemma` | `?` | The original token text (lemma form). |
+| `language` | `?<?>` | Detected language code (e.g. `"eng"`), or `None` if unknown. |
+| `entries` | `?<?>` | Matched dictionary entries for this token. |
+| `kind` | `?` | The token kind (e.g. `"Word"`, `"Punctuation"`). |
+| `script` | `?` | Detected script name (e.g. `"Latin"`, `"Han"`). |
+| `start` | `usize` | Start byte offset in the original text. |
+| `end` | `usize` | End byte offset in the original text. |
+
+## `TokenizeOptions`
+
+Options for configuring text tokenization.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `follow` | `?<?<bool, u32>>` | Whether to follow `see_also` cross-references. Accepts `True`/`False` or a number (nonzero = follow). |
+| `insensitive` | `?<bool>` | Whether to enable case-insensitive matching. |
+
+## `Translation`
+
+A translation of a word, definition, or example into another language.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `lang` | `?` | The BCP-47 language code (e.g. `"fra"`, `"deu"`). |
+| `value` | `?` | The translated text. |
diff --git a/docs/scripts/generate-cli-docs.mjs b/docs/scripts/generate-cli-docs.mjs
new file mode 100644
index 00000000..9672b774
--- /dev/null
+++ b/docs/scripts/generate-cli-docs.mjs
@@ -0,0 +1,454 @@
+/**
+ * Generates CLI reference documentation by parsing the clap arg definitions
+ * directly from the Rust source files in cli/src/.
+ *
+ * Run: node scripts/generate-cli-docs.mjs
+ *
+ * Outputs: src/content/docs/cli/reference.md
+ */
+
+import { readFileSync, writeFileSync, mkdirSync, readdirSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const cliSrc = join(__dirname, "../../cli/src");
+const outPath = join(__dirname, "../src/content/docs/cli/reference.md");
+
+// ---------------------------------------------------------------------------
+// Read all Rust source files
+// ---------------------------------------------------------------------------
+
+function readRustFile(relPath) {
+  return readFileSync(join(cliSrc, relPath), "utf-8");
+}
+
+// ---------------------------------------------------------------------------
+// Parse the Commands enum from cli.rs to get command descriptions
+// ---------------------------------------------------------------------------
+
+function parseCommandsEnum(source) {
+  const commands = {};
+  // Match: /// doc comment followed by variant name
+  const re = /\/\/\/\s*(.*)\n\s*(?:#\[.*\]\n\s*)*(\w+)\((\w+)\)/g;
+  let m;
+  while ((m = re.exec(source)) !== null) {
+    const doc = m[1].trim();
+    const variant = m[2];
+    commands[variant] = doc;
+  }
+  return commands;
+}
+
+// ---------------------------------------------------------------------------
+// Parse #[arg(...)] fields from an Args struct
+// ---------------------------------------------------------------------------
+
+function parseArgsStruct(source) {
+  const fields = [];
+
+  // Find the struct body (everything between the first { and last })
+  const structMatch = source.match(
+    /pub\s+struct\s+\w+Args\s*\{([\s\S]*?)\n\}/
+  );
+  if (!structMatch) return fields;
+
+  const body = structMatch[1];
+
+  // Split by field declarations - each field may have preceding attributes and doc comments
+  // We look for patterns like:
+  //   /// doc comment
+  //   #[arg(...)]
+  //   pub field_name: Type,
+  //   -- or --
+  //   #[arg(..., help = "...")]
+  //   field_name: Type,
+
+  const fieldRegex =
+    /((?:\/\/\/[^\n]*\n\s*|#\[(?:arg|pyo3)[^\]]*\]\n\s*)*)\s*(?:pub(?:\((?:super|crate)\))?\s+)?(\w+)\s*:\s*([^,\n]+)/g;
+
+  let fm;
+  while ((fm = fieldRegex.exec(body)) !== null) {
+    const attrs = fm[1];
+    const name = fm[2];
+    const type = fm[3].trim();
+
+    // Skip command subcommand fields
+    if (attrs.includes("#[command")) continue;
+
+    // Parse #[arg(...)] attributes
+    const argAttr = attrs.match(/#\[arg\(([\s\S]*?)\)\]/);
+    const argContent = argAttr ? argAttr[1] : "";
+
+    // Extract help text
+    let help = extractQuoted(argContent, "help");
+
+    // Fall back to /// doc comments
+    if (!help) {
+      const docMatch = attrs.match(/\/\/\/\s*(.*)/);
+      if (docMatch) help = docMatch[1].trim();
+    }
+
+    // Extract short flag
+    let short = null;
+    const shortMatch = argContent.match(
+      /short\s*=\s*'([^']+)'/
+    );
+    if (shortMatch) {
+      short = `-${shortMatch[1]}`;
+    } else if (/\bshort\b/.test(argContent) && !/short\s*=/.test(argContent)) {
+      // bare `short` means use first char of field name
+      short = `-${name[0]}`;
+    }
+
+    // Extract long flag
+    let long = null;
+    const longMatch = argContent.match(
+      /long\s*=\s*"([^"]+)"/
+    );
+    if (longMatch) {
+      long = `--${longMatch[1]}`;
+    } else if (/\blong\b/.test(argContent) && !/long\s*=/.test(argContent)) {
+      // bare `long` means use field name with _ -> -
+      long = `--${name.replace(/_/g, "-")}`;
+    }
+
+    // Check if required
+    const required =
+      argContent.includes("required = true") ||
+      (type !== "bool" &&
+        !type.startsWith("Option<") &&
+        !type.startsWith("Vec<") &&
+        !short &&
+        !long &&
+        !argContent.includes("default_value"));
+
+    // Check for default value
+    let defaultVal = null;
+    const defaultMatch = argContent.match(
+      /default_value_t\s*=\s*([^,\)]+)/
+    );
+    if (defaultMatch) {
+      defaultVal = defaultMatch[1].trim();
+      // Clean up Rust-specific patterns
+      defaultVal = defaultVal
+        .replace(/crate::DEFAULT_RETRIES/, "3")
+        .replace(/DEFAULT_INDEX_MEMORY/, "15000000")
+        .replace(/DumpFormat::XML/, "xml")
+        .replace(/PrintFormat::Print/, "print")
+        .replace(/PrintFormat::JSON/, "json");
+    }
+
+    // Determine if this is a positional arg or a flag
+    const isPositional = !short && !long && !argContent.includes("default_value_t") && type !== "bool";
+
+    // Extract value_enum
+    const isValueEnum = argContent.includes("value_enum");
+
+    // Determine the arg type for display
+    let argType = null;
+    if (type === "bool" || type === "Option<bool>") {
+      argType = null; // boolean flags don't take a value
+    } else if (isValueEnum) {
+      argType = `<${name}>`;
+    } else if (type.includes("PathBuf") || type.includes("String")) {
+      argType = `<${name}>`;
+    } else if (type.includes("u32") || type.includes("usize") || type.includes("u16")) {
+      argType = `<${name}>`;
+    } else if (type.includes("Vec<String>")) {
+      argType = `<${name}...>`;
+    }
+
+    // Extract value_parser range info for help
+    const rangeMatch = argContent.match(/value_parser.*?range\((\d+)\.\.=(\d+)\)/);
+    if (rangeMatch) {
+      const rangeInfo = `(${rangeMatch[1]}–${rangeMatch[2]})`;
+      if (help && !help.includes(rangeMatch[1])) {
+        help = `${help} ${rangeInfo}`;
+      }
+    }
+
+    fields.push({
+      name,
+      type,
+      short,
+      long,
+      help: help || "",
+      required,
+      isPositional,
+      defaultVal,
+      argType,
+    });
+  }
+
+  return fields;
+}
+
+function extractQuoted(text, key) {
+  // Match: key = "value" where value may span multiple lines due to formatting
+  const re = new RegExp(`${key}\\s*=\\s*"([^"]*)"`, "s");
+  const m = re.exec(text);
+  return m ? m[1].trim() : null;
+}
+
+// ---------------------------------------------------------------------------
+// Parse the AliasCommands enum
+// ---------------------------------------------------------------------------
+
+function parseAliasCommands(source) {
+  const commands = {};
+  const re = /\/\/\/\s*(.*)\n\s*(?:#\[.*\]\n\s*)*(\w+)\((\w+)\)/g;
+  let m;
+  while ((m = re.exec(source)) !== null) {
+    commands[m[2]] = m[1].trim();
+  }
+  return commands;
+}
+
+// ---------------------------------------------------------------------------
+// Parse HTTP serve endpoint structs from serve/ directory
+// ---------------------------------------------------------------------------
+
+function parseServeEndpoints() {
+  const endpoints = [];
+
+  for (const file of ["lookup.rs", "search.rs", "tokenize.rs"]) {
+    const source = readRustFile(`serve/${file}`);
+
+    // Extract route path: #[get("/{name}/...")]
+    const routeMatch = source.match(/#\[get\("([^"]+)"\)\]/);
+    if (!routeMatch) continue;
+    const route = routeMatch[1];
+
+    // Extract request struct fields
+    const structMatch = source.match(
+      /pub\s+struct\s+(\w+Request)\s*\{([\s\S]*?)\}/
+    );
+    if (!structMatch) continue;
+
+    const structName = structMatch[1];
+    const body = structMatch[2];
+
+    const params = [];
+    const fieldRe = /(\w+)\s*:\s*([^,\n]+)/g;
+    let fm;
+    while ((fm = fieldRe.exec(body)) !== null) {
+      const name = fm[1];
+      const type = fm[2].trim().replace(/,$/, "");
+      const isOptional = type.startsWith("Option<");
+      const innerType = isOptional
+        ? type.match(/Option<(\w+)>/)?.[1] || type
+        : type;
+      params.push({
+        name,
+        type: innerType === "String" ? "string" : innerType === "bool" ? "boolean" : "number",
+        optional: isOptional,
+      });
+    }
+
+    endpoints.push({ route, params });
+  }
+
+  return endpoints;
+}
+
+// ---------------------------------------------------------------------------
+// Build CLI documentation from parsed source
+// ---------------------------------------------------------------------------
+
+const cliSource = readRustFile("cli.rs");
+const commandDescs = parseCommandsEnum(cliSource);
+const aliasSource = readRustFile("alias/alias.rs");
+const aliasDescs = parseAliasCommands(aliasSource);
+
+// Map command variant names to their source files
+const commandFiles = {
+  Compile: "compile.rs",
+  Download: "download.rs",
+  Dump: "dump.rs",
+  Index: "index.rs",
+  Info: "info.rs",
+  Lexicon: "lexicon.rs",
+  Lookup: "lookup.rs",
+  Merge: "merge.rs",
+  New: "new.rs",
+  Search: "search.rs",
+  Serve: "serve/mod.rs",
+  Tokenize: "tokenize.rs",
+};
+
+const aliasFiles = {
+  Add: "alias/set.rs",
+  Set: "alias/set.rs",
+  Delete: "alias/delete.rs",
+};
+
+// Parse serve HTTP endpoints
+const serveEndpoints = parseServeEndpoints();
+
+// ---------------------------------------------------------------------------
+// Render Markdown
+// ---------------------------------------------------------------------------
+
+let md = `---
+title: CLI Reference
+description: Complete reference for the ODict command-line interface.
+---
+
+{/* This file is auto-generated by scripts/generate-cli-docs.mjs — do not edit manually. */}
+
+\`\`\`
+odict [OPTIONS] <COMMAND>
+\`\`\`
+
+The ODict CLI is the primary tool for creating, compiling, and querying ODict dictionaries.
+
+## Global options
+
+| Option | Description |
+|--------|-------------|
+| \`-q, --quiet\` | Silence any non-important output |
+| \`-h, --help\` | Print help |
+| \`-V, --version\` | Print version |
+
+---
+
+## Commands
+
+`;
+
+// Render each main command
+for (const [variant, file] of Object.entries(commandFiles)) {
+  const source = readRustFile(file);
+  const fields = parseArgsStruct(source);
+  const desc = commandDescs[variant] || variant;
+  const cmdName = variant.toLowerCase();
+
+  md += `### \`odict ${cmdName}\`\n\n`;
+  md += `${desc}.\n\n`;
+
+  // Build usage string
+  const positionals = fields.filter((f) => f.isPositional);
+  const options = fields.filter((f) => !f.isPositional);
+  let usage = `odict ${cmdName}`;
+  for (const p of positionals) {
+    if (p.type.includes("Vec<")) {
+      usage += p.required ? ` <${p.name}...>` : ` [${p.name}...]`;
+    } else {
+      usage += p.required ? ` <${p.name}>` : ` [${p.name}]`;
+    }
+  }
+  for (const o of options) {
+    if (o.name === "retries") continue; // skip common retries flag in usage
+    const flag = o.short || o.long;
+    if (flag) {
+      if (o.argType) {
+        usage += ` [${flag} ${o.argType}]`;
+      } else {
+        usage += ` [${flag}]`;
+      }
+    }
+  }
+  md += `\`\`\`\n${usage}\n\`\`\`\n\n`;
+
+  // Positional arguments table
+  if (positionals.length > 0) {
+    md += `#### Arguments\n\n`;
+    md += `| Argument | Required | Description |\n`;
+    md += `|----------|----------|-------------|\n`;
+    for (const p of positionals) {
+      md += `| \`${p.name}\` | ${p.required ? "Yes" : "No"} | ${p.help} |\n`;
+    }
+    md += `\n`;
+  }
+
+  // Options table
+  if (options.length > 0) {
+    md += `#### Options\n\n`;
+    md += `| Flag | Description |\n`;
+    md += `|------|-------------|\n`;
+    for (const o of options) {
+      const flags = [o.short, o.long].filter(Boolean).join(", ");
+      let desc = o.help;
+      if (o.defaultVal && !desc.includes("default")) {
+        desc += ` (default: \`${o.defaultVal}\`)`;
+      }
+      md += `| \`${flags}\` | ${desc} |\n`;
+    }
+    md += `\n`;
+  }
+
+  // HTTP endpoints for serve command
+  if (cmdName === "serve" && serveEndpoints.length > 0) {
+    md += `#### HTTP endpoints\n\n`;
+    md += `When running \`odict serve\`, the following REST endpoints become available. All return JSON.\n\n`;
+
+    for (const ep of serveEndpoints) {
+      md += `##### \`GET ${ep.route}\`\n\n`;
+      md += `| Parameter | Type | Required | Description |\n`;
+      md += `|-----------|------|----------|-------------|\n`;
+      for (const p of ep.params) {
+        md += `| \`${p.name}\` | ${p.type} | ${p.optional ? "No" : "Yes"} | |\n`;
+      }
+      md += `\n`;
+    }
+  }
+
+  md += `---\n\n`;
+}
+
+// Render alias subcommands
+md += `### \`odict alias\`\n\n`;
+md += `Manage dictionary aliases.\n\n`;
+
+for (const [variant, file] of Object.entries(aliasFiles)) {
+  const source = readRustFile(file);
+  const fields = parseArgsStruct(source);
+  const desc = aliasDescs[variant] || variant;
+  const cmdName = variant.toLowerCase();
+
+  md += `#### \`odict alias ${cmdName}\`\n\n`;
+  md += `${desc}.\n\n`;
+
+  // Build usage
+  const positionals = fields.filter((f) => f.isPositional);
+  const options = fields.filter((f) => !f.isPositional);
+  let usage = `odict alias ${cmdName}`;
+  for (const p of positionals) {
+    usage += p.required ? ` <${p.name}>` : ` [${p.name}]`;
+  }
+  md += `\`\`\`\n${usage}\n\`\`\`\n\n`;
+
+  if (positionals.length > 0) {
+    md += `| Argument | Required | Description |\n`;
+    md += `|----------|----------|-------------|\n`;
+    for (const p of positionals) {
+      md += `| \`${p.name}\` | ${p.required ? "Yes" : "No"} | ${p.help} |\n`;
+    }
+    md += `\n`;
+  }
+
+  if (options.length > 0) {
+    md += `| Flag | Description |\n`;
+    md += `|------|-------------|\n`;
+    for (const o of options) {
+      const flags = [o.short, o.long].filter(Boolean).join(", ");
+      let desc = o.help;
+      if (o.defaultVal && !desc.includes("default")) {
+        desc += ` (default: \`${o.defaultVal}\`)`;
+      }
+      md += `| \`${flags}\` | ${desc} |\n`;
+    }
+    md += `\n`;
+  }
+}
+
+md += `---\n`;
+
+// ---------------------------------------------------------------------------
+// Write output
+// ---------------------------------------------------------------------------
+
+mkdirSync(dirname(outPath), { recursive: true });
+writeFileSync(outPath, md, "utf-8");
+console.log(`✅ Generated CLI reference → ${outPath}`);
diff --git a/docs/scripts/generate-schema-docs.mjs b/docs/scripts/generate-schema-docs.mjs
new file mode 100644
index 00000000..2bcce7c3
--- /dev/null
+++ b/docs/scripts/generate-schema-docs.mjs
@@ -0,0 +1,423 @@
+/**
+ * Parses odict.xsd and lib/src/schema/pos.rs to generate
+ * a Markdown reference page for the ODict XML schema.
+ *
+ * Run: node scripts/generate-schema-docs.mjs
+ *
+ * Outputs: src/content/docs/schema/reference.md
+ */
+
+import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const xsdPath = join(__dirname, "../../odict.xsd");
+const posPath = join(__dirname, "../../lib/src/schema/pos.rs");
+const outPath = join(__dirname, "../src/content/docs/schema/reference.md");
+
+const xsd = readFileSync(xsdPath, "utf-8");
+const posSource = readFileSync(posPath, "utf-8");
+
+// ---------------------------------------------------------------------------
+// Tokenize XSD into open / close / self-closing tags
+// ---------------------------------------------------------------------------
+
+function tokenize(xml) {
+  const tokens = [];
+  const re = /<(\/?)(\w[\w:.]*)((?:\s+[\w:]+\s*=\s*"[^"]*")*)\s*(\/?)>/g;
+  let m;
+  while ((m = re.exec(xml)) !== null) {
+    const isClose = m[1] === "/";
+    const tag = m[2];
+    const attrStr = m[3];
+    const isSelfClose = m[4] === "/";
+
+    const attrs = {};
+    const attrRe = /([\w:]+)\s*=\s*"([^"]*)"/g;
+    let am;
+    while ((am = attrRe.exec(attrStr)) !== null) {
+      attrs[am[1]] = am[2];
+    }
+
+    if (isClose) {
+      tokens.push({ type: "close", tag, attrs });
+    } else if (isSelfClose) {
+      tokens.push({ type: "selfclose", tag, attrs });
+    } else {
+      tokens.push({ type: "open", tag, attrs });
+    }
+  }
+  return tokens;
+}
+
+const tokens = tokenize(xsd);
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** Find the index of the matching close tag for an open tag at `openIndex`. */
+function findClose(openIndex) {
+  const openTag = tokens[openIndex].tag;
+  let depth = 1;
+  for (let i = openIndex + 1; i < tokens.length; i++) {
+    if (tokens[i].tag === openTag) {
+      if (tokens[i].type === "open") depth++;
+      else if (tokens[i].type === "close") {
+        depth--;
+        if (depth === 0) return i;
+      }
+    }
+  }
+  return -1;
+}
+
+/**
+ * Parse a complexType range (ctOpen..ctClose) for its direct attributes
+ * and direct child elements. "Direct" means not inside a nested
+ * xs:complexType — we track xs:complexType nesting depth and only
+ * collect items at depth 0.
+ */
+function parseComplexTypeRange(ctOpen, ctClose) {
+  const attributes = [];
+  const children = [];
+  let depth = 0;
+
+  for (let i = ctOpen + 1; i < ctClose; i++) {
+    const t = tokens[i];
+
+    // Track nesting of inner xs:complexType blocks
+    if (t.tag === "xs:complexType") {
+      if (t.type === "open") depth++;
+      else if (t.type === "close") depth--;
+      continue;
+    }
+
+    if (depth > 0) continue; // inside a nested complexType — skip
+
+    // Collect attributes at depth 0
+    if (
+      t.tag === "xs:attribute" &&
+      t.attrs.name &&
+      (t.type === "selfclose" || t.type === "open")
+    ) {
+      attributes.push({
+        name: t.attrs.name,
+        type: (t.attrs.type || "xs:string").replace("xs:", ""),
+        required: t.attrs.use === "required",
+      });
+    }
+
+    // Collect child elements at depth 0
+    if (
+      t.tag === "xs:element" &&
+      t.attrs.name &&
+      (t.type === "selfclose" || t.type === "open")
+    ) {
+      children.push({
+        name: t.attrs.name,
+        type: t.attrs.type || undefined,
+        minOccurs: t.attrs.minOccurs ?? "1",
+        maxOccurs: t.attrs.maxOccurs ?? "1",
+      });
+
+      // Skip past this element's entire subtree
+      if (t.type === "open") {
+        i = findClose(i);
+      }
+    }
+  }
+
+  return { attributes, children };
+}
+
+// ---------------------------------------------------------------------------
+// Phase 1: Parse named complexTypes (defined at XSD top-level)
+// ---------------------------------------------------------------------------
+
+const namedTypes = new Map();
+
+for (let i = 0; i < tokens.length; i++) {
+  const t = tokens[i];
+  if (t.tag === "xs:complexType" && t.type === "open" && t.attrs.name) {
+    const ctClose = findClose(i);
+    const { attributes, children } = parseComplexTypeRange(i, ctClose);
+    namedTypes.set(t.attrs.name, { attributes, children });
+    i = ctClose;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Phase 2: Recursively walk the root element to build element map
+// ---------------------------------------------------------------------------
+
+const elements = new Map();
+
+/** Resolve a named complexType into an element descriptor and register children. */
+function resolveNamedType(typeName) {
+  const type = namedTypes.get(typeName);
+  if (!type) return { attributes: [], children: [] };
+
+  const children = type.children.map((c) => ({
+    name: c.name,
+    minOccurs: c.minOccurs,
+    maxOccurs: c.maxOccurs,
+  }));
+
+  // Recursively register child elements that reference named types
+  for (const child of type.children) {
+    if (!elements.has(child.name) && child.type && namedTypes.has(child.type)) {
+      elements.set(child.name, resolveNamedType(child.type));
+    } else if (!elements.has(child.name)) {
+      elements.set(child.name, { attributes: [], children: [] });
+    }
+  }
+
+  return { attributes: [...type.attributes], children };
+}
+
+/** Process an xs:element token at `index` and register it in the elements map. */
+function processElement(index) {
+  const t = tokens[index];
+  const name = t.attrs.name;
+  const type = t.attrs.type;
+
+  if (elements.has(name)) return;
+
+  // Self-closing element or element with a named type
+  if (t.type === "selfclose") {
+    if (type && namedTypes.has(type)) {
+      elements.set(name, resolveNamedType(type));
+    } else {
+      elements.set(name, { attributes: [], children: [] });
+    }
+    return;
+  }
+
+  // Open element with a named type (no inline complexType)
+  if (type && namedTypes.has(type)) {
+    elements.set(name, resolveNamedType(type));
+    return;
+  }
+
+  const elClose = findClose(index);
+
+  // Find the inline xs:complexType within this element
+  for (let i = index + 1; i < elClose; i++) {
+    if (tokens[i].tag === "xs:complexType" && tokens[i].type === "open") {
+      const ctClose = findClose(i);
+      const { attributes, children } = parseComplexTypeRange(i, ctClose);
+
+      elements.set(name, {
+        attributes,
+        children: children.map((c) => ({
+          name: c.name,
+          minOccurs: c.minOccurs,
+          maxOccurs: c.maxOccurs,
+        })),
+      });
+
+      // Recursively process child elements found at depth 0
+      let depth = 0;
+      for (let j = i + 1; j < ctClose; j++) {
+        if (tokens[j].tag === "xs:complexType") {
+          if (tokens[j].type === "open") depth++;
+          else if (tokens[j].type === "close") depth--;
+          continue;
+        }
+        if (depth > 0) continue;
+
+        if (
+          tokens[j].tag === "xs:element" &&
+          tokens[j].attrs.name &&
+          (tokens[j].type === "selfclose" || tokens[j].type === "open")
+        ) {
+          processElement(j);
+          if (tokens[j].type === "open") {
+            j = findClose(j);
+          }
+        }
+      }
+
+      break;
+    }
+  }
+}
+
+// Find the root <xs:element name="dictionary"> and process it
+for (let i = 0; i < tokens.length; i++) {
+  const t = tokens[i];
+  if (
+    t.tag === "xs:element" &&
+    t.attrs.name === "dictionary" &&
+    (t.type === "open" || t.type === "selfclose")
+  ) {
+    processElement(i);
+    break;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Parse POS codes from lib/src/schema/pos.rs
+// ---------------------------------------------------------------------------
+
+function parsePosEnum(source) {
+  const entries = [];
+  const re =
+    /#\[strum\(to_string\s*=\s*"([^"]+)"\)\]\s*(?:#\[.*\]\s*)*(\w+)/g;
+  let m;
+  while ((m = re.exec(source)) !== null) {
+    const label = m[1];
+    const variant = m[2];
+    if (variant === "Other") continue;
+    entries.push({ variant, label });
+  }
+  return entries;
+}
+
+const allPos = parsePosEnum(posSource);
+
+const japaneseVariantPrefixes = [
+  "AdjPn", "AdjKari", "AdjKu", "AdjNari", "AdjNa", "AdjShiku",
+  "AdjT", "AdjIx", "NAdv", "AdvTo", "AdjNo", "NPref", "NSuf",
+  "NT", "AdjF", "V5", "V1", "Vz", "Vk", "V2", "Vn", "Vr",
+  "VsC", "Vs", "VUnspec", "V4",
+];
+
+function isJapanese(variant) {
+  return japaneseVariantPrefixes.some(
+    (p) => variant === p || variant.startsWith(p)
+  );
+}
+
+const universalPos = allPos.filter((p) => !isJapanese(p.variant));
+const japanesePos = allPos.filter((p) => isJapanese(p.variant));
+
+function variantToCode(variant) {
+  return variant
+    .replace(/([a-z])([A-Z])/g, "$1_$2")
+    .replace(/([A-Z]+)([A-Z][a-z])/g, "$1_$2")
+    .toLowerCase();
+}
+
+// ---------------------------------------------------------------------------
+// Build element hierarchy tree (with deduplication via seen set)
+// ---------------------------------------------------------------------------
+
+function buildTree(name, prefix = "", isLast = true, seen = new Set(), isRoot = true) {
+  const el = elements.get(name);
+  const connector = isRoot ? "" : isLast ? "└── " : "├── ";
+
+  if (seen.has(name)) {
+    return `${prefix}${connector}${name} …\n`;
+  }
+
+  let result = `${prefix}${connector}${name}\n`;
+  seen.add(name);
+
+  if (!el || el.children.length === 0) return result;
+
+  const childPrefix = isRoot ? "" : prefix + (isLast ? "    " : "│   ");
+
+  for (let i = 0; i < el.children.length; i++) {
+    const child = el.children[i];
+    const childIsLast = i === el.children.length - 1;
+    result += buildTree(child.name, childPrefix, childIsLast, seen, false);
+  }
+
+  return result;
+}
+
+// ---------------------------------------------------------------------------
+// Render Markdown
+// ---------------------------------------------------------------------------
+
+const elementOrder = [
+  "dictionary", "entry", "ety", "sense", "group",
+  "definition", "note", "example", "pronunciation", "url",
+];
+
+let md = `---
+title: XML Schema Reference
+description: Complete reference for the ODict XML (ODXML) schema.
+---
+
+{/* This file is auto-generated by scripts/generate-schema-docs.mjs — do not edit manually. */}
+
+This page is automatically generated from [\`odict.xsd\`](https://github.com/TheOpenDictionary/odict/blob/main/odict.xsd) and [\`pos.rs\`](https://github.com/TheOpenDictionary/odict/blob/main/lib/src/schema/pos.rs).
+
+## Element hierarchy
+
+\`\`\`
+${buildTree("dictionary").trimEnd()}
+\`\`\`
+
+---
+
+## Elements
+
+`;
+
+for (const name of elementOrder) {
+  const el = elements.get(name);
+  if (!el) continue;
+
+  md += `### \`<${name}>\`\n\n`;
+
+  if (el.attributes.length > 0) {
+    md += `#### Attributes\n\n`;
+    md += `| Attribute | Type | Required |\n`;
+    md += `|-----------|------|----------|\n`;
+    for (const a of el.attributes) {
+      md += `| \`${a.name}\` | \`${a.type}\` | ${a.required ? "Yes" : "No"} |\n`;
+    }
+    md += `\n`;
+  }
+
+  if (el.children.length > 0) {
+    md += `#### Child elements\n\n`;
+    md += `| Element | Min | Max |\n`;
+    md += `|---------|-----|-----|\n`;
+    for (const c of el.children) {
+      md += `| [\`<${c.name}>\`](#${c.name}) | ${c.minOccurs} | ${c.maxOccurs} |\n`;
+    }
+    md += `\n`;
+  }
+
+  md += `---\n\n`;
+}
+
+// ---------------------------------------------------------------------------
+// Parts of Speech
+// ---------------------------------------------------------------------------
+
+md += `## Parts of speech\n\n`;
+md += `The \`pos\` attribute on \`<sense>\` accepts the following values. You can also pass any custom string, which will be treated as a custom part of speech.\n\n`;
+
+md += `### Universal\n\n`;
+md += `| Code | Label |\n`;
+md += `|------|-------|\n`;
+for (const p of universalPos) {
+  md += `| \`${variantToCode(p.variant)}\` | ${p.label} |\n`;
+}
+md += `\n`;
+
+md += `### Japanese-specific\n\n`;
+md += `| Code | Label |\n`;
+md += `|------|-------|\n`;
+for (const p of japanesePos) {
+  md += `| \`${variantToCode(p.variant)}\` | ${p.label} |\n`;
+}
+md += `\n`;
+
+// ---------------------------------------------------------------------------
+// Write output
+// ---------------------------------------------------------------------------
+
+mkdirSync(dirname(outPath), { recursive: true });
+writeFileSync(outPath, md, "utf-8");
+
+console.log(
+  `Generated schema reference -> ${outPath} (${elements.size} elements, ${allPos.length} POS codes)`
+);
diff --git a/docs/src/content.config.ts b/docs/src/content.config.ts
new file mode 100644
index 00000000..a4eec59b
--- /dev/null
+++ b/docs/src/content.config.ts
@@ -0,0 +1,6 @@
+import { defineCollection } from "astro:content";
+import { docsSchema } from "@astrojs/starlight/schema";
+
+export const collections = {
+  docs: defineCollection({ schema: docsSchema() }),
+};
diff --git a/docs/src/content/docs/api/javascript.md b/docs/src/content/docs/api/javascript.md
new file mode 100644
index 00000000..2ca8c1a7
--- /dev/null
+++ b/docs/src/content/docs/api/javascript.md
@@ -0,0 +1,386 @@
+---
+title: JavaScript API
+description: Using ODict from JavaScript/TypeScript via the @odict/node package.
+---
+
+The JavaScript bindings are distributed as `@odict/node` on npm. They are native extensions built with [NAPI-RS](https://napi.rs/) and also support the browser via WASI.
+
+## Installation
+
+```bash
+npm install @odict/node
+```
+
+Requires Node.js 12+. Native binaries are included for all major platforms (macOS, Linux, Windows, ARM64, WASI).
+
+## Quick example
+
+```typescript
+import { readFile } from "node:fs/promises";
+import { compile, OpenDictionary } from "@odict/node";
+
+// Compile XML to a buffer
+const xml = await readFile("my-dictionary.xml", "utf-8");
+const data = compile(xml);
+const dictionary = new OpenDictionary(data);
+
+const results = dictionary.lookup("hello");
+console.log(results[0].entry.term); // "hello"
+```
+
+---
+
+## Functions
+
+### `compile(xml: string): Buffer`
+
+Compiles an ODXML string into binary `.odict` data. Returns a `Buffer` that can be passed to `new OpenDictionary()`.
+
+```typescript
+import { compile } from "@odict/node";
+
+const data = compile(`
+  <dictionary>
+    <entry term="hi">
+      <ety><sense><definition value="greeting"/></sense></ety>
+    </entry>
+  </dictionary>
+`);
+```
+
+---
+
+## `OpenDictionary`
+
+The main class for working with compiled dictionaries.
+
+### Constructors
+
+#### `new OpenDictionary(data: Buffer | string)`
+
+Creates a dictionary from compiled binary data (as returned by `compile()`) or directly from an XML string.
+
+```typescript
+import { compile, OpenDictionary } from "@odict/node";
+
+// From compiled buffer
+const data = compile(xmlString);
+const dictionary = new OpenDictionary(data);
+
+// Directly from XML string
+const dictionary = new OpenDictionary(xmlString);
+```
+
+#### `OpenDictionary.load(dictionary: string, options?: LoadOptions): Promise<OpenDictionary>`
+
+Loads a dictionary from a file path or remote identifier. Returns a `Promise`.
+
+- If `dictionary` is a path to a `.odict` file, it loads from disk.
+- If it matches the format `org/lang` (e.g. `wiktionary/eng`), it downloads from the remote registry.
+
+```typescript
+import { OpenDictionary } from "@odict/node";
+
+// Load from file
+const dictionary = await OpenDictionary.load("./my-dictionary.odict");
+
+// Load from remote registry
+const dictionary = await OpenDictionary.load("wiktionary/eng");
+
+// Load with options
+const dictionary = await OpenDictionary.load("wiktionary/eng", {
+  configDir: "./config",
+  remote: { caching: true, retries: 3 },
+});
+```
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `minRank` | `number \| null` | The minimum rank value across all entries, or `null` if no entries have ranks |
+| `maxRank` | `number \| null` | The maximum rank value across all entries, or `null` if no entries have ranks |
+
+### Methods
+
+#### `save(path: string, options?: SaveOptions): void`
+
+Saves the dictionary to disk as a `.odict` file.
+
+```typescript
+dictionary.save("output.odict");
+dictionary.save("output.odict", {
+  compress: { quality: 11, windowSize: 22 },
+});
+```
+
+#### `lookup(query: string | string[], options?: LookupOptions): LookupResult[]`
+
+Looks up one or more terms by exact match.
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `query` | `string \| string[]` | — | Term(s) to look up |
+| `options.split` | `number` | — | Minimum word length for compound splitting |
+| `options.follow` | `boolean` | — | Follow `see` cross-references until an entry with etymologies is found |
+| `options.insensitive` | `boolean` | — | Enable case-insensitive matching |
+
+```typescript
+// Simple lookup
+const results = dictionary.lookup("cat");
+
+// Multiple terms
+const results = dictionary.lookup(["cat", "dog"]);
+
+// Follow cross-references, case-insensitive
+const results = dictionary.lookup("RaN", {
+  follow: true,
+  insensitive: true,
+});
+// results[0].entry.term === "run"
+// results[0].directedFrom?.term === "ran"
+
+// Compound word splitting
+const results = dictionary.lookup("catdog", { split: 3 });
+```
+
+#### `lexicon(): string[]`
+
+Returns all terms defined in the dictionary, sorted alphabetically.
+
+```typescript
+const words = dictionary.lexicon();
+// ["cat", "dog", "run", ...]
+```
+
+#### `index(options?: IndexOptions): void`
+
+Creates a full-text search index for the dictionary.
+
+```typescript
+dictionary.index();
+dictionary.index({ overwrite: true, memory: 50_000_000 });
+```
+
+#### `search(query: string, options?: SearchOptions): Entry[]`
+
+Runs a full-text search. Requires an index (call `index()` first).
+
+```typescript
+dictionary.index();
+
+const results = dictionary.search("domesticated mammal");
+const results = dictionary.search("greeting", { limit: 5 });
+```
+
+#### `tokenize(text: string, options?: TokenizeOptions): Token[]`
+
+Tokenizes text and matches each token against the dictionary. Supports Chinese, Japanese, Korean, Thai, Khmer, German, Swedish, and Latin-script languages.
+
+```typescript
+const tokens = dictionary.tokenize("the cat ran");
+for (const token of tokens) {
+  console.log(token.lemma, token.entries);
+}
+
+// With options
+const tokens = dictionary.tokenize("DOG cat", {
+  insensitive: true,
+  follow: true,
+});
+```
+
+---
+
+## Types
+
+### `LookupResult`
+
+```typescript
+interface LookupResult {
+  entry: Entry;
+  directedFrom?: Entry;
+}
+```
+
+### `Entry`
+
+```typescript
+interface Entry {
+  term: string;
+  rank?: number;
+  seeAlso?: string;
+  etymologies: Etymology[];
+  media: MediaURL[];
+}
+```
+
+### `Etymology`
+
+```typescript
+interface Etymology {
+  id?: string;
+  pronunciations: Pronunciation[];
+  description?: string;
+  senses: Sense[];
+}
+```
+
+### `Sense`
+
+```typescript
+interface Sense {
+  pos: EnumWrapper;
+  lemma?: string;
+  definitions: Array<Definition | Group>;
+  tags: string[];
+  translations: Translation[];
+  forms: Form[];
+}
+```
+
+### `Definition`
+
+```typescript
+interface Definition {
+  id?: string;
+  value: string;
+  examples: Example[];
+  notes: Note[];
+}
+```
+
+### `Group`
+
+```typescript
+interface Group {
+  id?: string;
+  description: string;
+  definitions: Definition[];
+}
+```
+
+### `Example`
+
+```typescript
+interface Example {
+  value: string;
+  translations: Translation[];
+  pronunciations: Pronunciation[];
+}
+```
+
+### `Note`
+
+```typescript
+interface Note {
+  id?: string;
+  value: string;
+  examples: Example[];
+}
+```
+
+### `Pronunciation`
+
+```typescript
+interface Pronunciation {
+  kind?: EnumWrapper;
+  value: string;
+  media: MediaUrl[];
+}
+```
+
+### `MediaUrl`
+
+```typescript
+interface MediaUrl {
+  src: string;
+  mimeType?: string;
+  description?: string;
+}
+```
+
+### `Token`
+
+```typescript
+interface Token {
+  lemma: string;
+  language?: string;
+  entries: LookupResult[];
+  kind: string;
+  script: string;
+  start: number;
+  end: number;
+}
+```
+
+### `EnumWrapper`
+
+```typescript
+interface EnumWrapper {
+  name: string;
+  variant: string;
+  value: string;
+}
+```
+
+### Options
+
+```typescript
+interface LoadOptions {
+  configDir?: string;
+  remote?: RemoteLoadOptions;
+}
+
+interface RemoteLoadOptions {
+  outDir?: string;
+  caching?: boolean;
+  retries?: number;
+}
+
+interface SaveOptions {
+  compress?: CompressOptions;
+}
+
+interface CompressOptions {
+  quality?: number;
+  windowSize?: number;
+}
+
+interface LookupOptions {
+  split?: number;
+  follow?: boolean;
+  insensitive?: boolean;
+}
+
+interface IndexOptions {
+  directory?: string;
+  memory?: number;
+  overwrite?: boolean;
+}
+
+interface SearchOptions {
+  directory?: string;
+  threshold?: number;
+  autoindex?: boolean;
+  limit?: number;
+}
+
+interface TokenizeOptions {
+  follow?: boolean;
+  allowList?: string[];
+  insensitive?: boolean;
+}
+```
+
+## Browser support
+
+The `@odict/node` package also supports browser environments via WASI. Import from the browser entry point:
+
+```typescript
+import { compile, OpenDictionary } from "@odict/node/browser";
+```
+
+:::note
+Browser support runs ODict compiled to WebAssembly via WASI. The `load()` method (which accesses the filesystem and network) is not available in the browser — use `new OpenDictionary(data)` with pre-compiled data instead.
+:::
diff --git a/docs/src/content/docs/api/python.md b/docs/src/content/docs/api/python.md
new file mode 100644
index 00000000..64aae17e
--- /dev/null
+++ b/docs/src/content/docs/api/python.md
@@ -0,0 +1,281 @@
+---
+title: Python API
+description: Using ODict from Python via the theopendictionary package.
+---
+
+The Python bindings are distributed as the `theopendictionary` package on PyPI. They are native extensions built with [PyO3](https://pyo3.rs/).
+
+## Installation
+
+```bash
+pip install theopendictionary
+```
+
+Requires Python 3.8.1+.
+
+## Quick example
+
+```python
+from theopendictionary import OpenDictionary, compile
+
+# Compile XML to bytes
+xml = """
+<dictionary name="My Dictionary">
+  <entry term="hello">
+    <ety>
+      <sense pos="intj">
+        <definition value="A greeting">
+          <example value="Hello, world!" />
+        </definition>
+      </sense>
+    </ety>
+  </entry>
+</dictionary>
+"""
+
+compiled_bytes = compile(xml)
+dictionary = OpenDictionary(compiled_bytes)
+
+results = dictionary.lookup("hello")
+print(results[0].entry.term)        # "hello"
+print(results[0].entry.etymologies)  # [Etymology(...)]
+```
+
+---
+
+## Functions
+
+### `compile(xml: str) -> bytes`
+
+Compiles an ODXML string into binary `.odict` data (as a `bytes` object). This data can be passed to `OpenDictionary()` or saved to disk.
+
+```python
+from theopendictionary import compile
+
+data = compile("<dictionary><entry term='hi'><ety><sense><definition value='greeting'/></sense></ety></entry></dictionary>")
+```
+
+---
+
+## `OpenDictionary`
+
+The main class for working with compiled dictionaries.
+
+### Constructors
+
+#### `OpenDictionary(data: bytes | str)`
+
+Creates a dictionary from compiled binary data (as returned by `compile()`) or directly from an XML string.
+
+```python
+from theopendictionary import OpenDictionary, compile
+
+# From compiled bytes
+data = compile(xml_string)
+dictionary = OpenDictionary(data)
+
+# Directly from XML string
+dictionary = OpenDictionary(xml_string)
+```
+
+#### `await OpenDictionary.load(dictionary: str, options: LoadOptions | None = None) -> OpenDictionary`
+
+Loads a dictionary from a file path, alias, or remote identifier. This is an **async** method.
+
+- If `dictionary` is a path to a `.odict` file, it loads from disk.
+- If it matches the format `org/lang` (e.g. `wiktionary/eng`), it downloads from the remote registry.
+
+```python
+import asyncio
+from theopendictionary import OpenDictionary, LoadOptions, RemoteLoadOptions
+
+async def main():
+    # Load from file
+    dictionary = await OpenDictionary.load("./my-dictionary.odict")
+
+    # Load from remote registry
+    dictionary = await OpenDictionary.load("wiktionary/eng")
+
+    # Load with options
+    opts = LoadOptions(
+        config_dir="./config",
+        remote=RemoteLoadOptions(caching=True)
+    )
+    dictionary = await OpenDictionary.load("wiktionary/eng", opts)
+
+asyncio.run(main())
+```
+
+### Properties
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `min_rank` | `int \| None` | The minimum rank value across all entries, or `None` if no entries have ranks |
+| `max_rank` | `int \| None` | The maximum rank value across all entries, or `None` if no entries have ranks |
+
+### Methods
+
+#### `save(path: str, quality: int | None = None, window_size: int | None = None) -> None`
+
+Saves the dictionary to disk as a `.odict` file. Optionally configure Brotli compression.
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `path` | `str` | — | Output file path |
+| `quality` | `int \| None` | `None` | Brotli compression level (0–11) |
+| `window_size` | `int \| None` | `None` | Brotli window size (0–22) |
+
+```python
+dictionary.save("output.odict")
+dictionary.save("output.odict", quality=11, window_size=22)
+```
+
+#### `lookup(query, split=None, follow=None, insensitive=None) -> list[LookupResult]`
+
+Looks up one or more terms by exact match.
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `query` | `str \| list[str]` | — | Term(s) to look up |
+| `split` | `int \| None` | `None` | Minimum word length for compound splitting |
+| `follow` | `bool \| None` | `None` | Follow `see` cross-references until an entry with etymologies is found |
+| `insensitive` | `bool \| None` | `None` | Enable case-insensitive matching |
+
+```python
+# Simple lookup
+results = dictionary.lookup("cat")
+
+# Multiple terms
+results = dictionary.lookup(["cat", "dog"])
+
+# Follow cross-references, case-insensitive
+results = dictionary.lookup("RaN", follow=True, insensitive=True)
+# results[0].entry.term == "run"
+# results[0].directed_from.term == "ran"
+
+# Compound word splitting
+results = dictionary.lookup("catdog", split=3)
+```
+
+#### `lexicon() -> list[str]`
+
+Returns all terms defined in the dictionary, sorted alphabetically.
+
+```python
+words = dictionary.lexicon()
+# ["cat", "dog", "run", ...]
+```
+
+#### `index(options=None) -> None`
+
+Creates a full-text search index for the dictionary.
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `options` | `IndexOptions \| None` | `None` | Indexing configuration |
+
+```python
+from theopendictionary import IndexOptions
+
+dictionary.index()
+dictionary.index(IndexOptions(overwrite=True, memory=50_000_000))
+```
+
+#### `search(query: str, options=None) -> list[Entry]`
+
+Runs a full-text search across the dictionary. Requires an index (call `index()` first).
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `query` | `str` | — | Search query |
+| `options` | `SearchOptions \| None` | `None` | Search configuration |
+
+```python
+from theopendictionary import SearchOptions
+
+dictionary.index()
+results = dictionary.search("domesticated mammal")
+results = dictionary.search("greeting", SearchOptions(limit=5))
+```
+
+#### `tokenize(text: str, follow=None, insensitive=None) -> list[Token]`
+
+Tokenizes text using NLP-based segmentation and matches each token against the dictionary. Supports Chinese, Japanese, Korean, Thai, Khmer, German, Swedish, and Latin-script languages.
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `text` | `str` | — | Text to tokenize |
+| `follow` | `bool \| int \| None` | `None` | Follow `see` cross-references. Accepts `True`/`False` or a number (nonzero = follow) |
+| `insensitive` | `bool \| None` | `None` | Case-insensitive matching |
+
+```python
+tokens = dictionary.tokenize("the cat ran")
+for token in tokens:
+    print(token.lemma, token.entries)
+```
+
+---
+
+## Types
+
+### `LookupResult`
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `entry` | `Entry` | The matched entry |
+| `directed_from` | `Entry \| None` | The original entry if a `see` redirect was followed |
+
+### `Entry`
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `term` | `str` | The headword |
+| `rank` | `int \| None` | Optional frequency rank |
+| `see_also` | `str \| None` | Cross-reference target term |
+| `etymologies` | `list[Etymology]` | List of etymologies |
+| `media` | `list[MediaURL]` | Media URLs |
+
+### `Token`
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `lemma` | `str` | The original token text |
+| `language` | `str \| None` | Detected language code |
+| `script` | `str` | Detected script name |
+| `kind` | `str` | Token kind |
+| `start` | `int` | Start offset in the original text |
+| `end` | `int` | End offset in the original text |
+| `entries` | `list[LookupResult]` | Matched dictionary entries |
+
+### `IndexOptions`
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `directory` | `str \| None` | `None` | Custom directory for the index |
+| `memory` | `int \| None` | `None` | Memory arena per thread in bytes (must be >15MB) |
+| `overwrite` | `bool \| None` | `None` | Overwrite existing index |
+
+### `SearchOptions`
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `directory` | `str \| None` | `None` | Custom index directory |
+| `threshold` | `int \| None` | `None` | Relevance threshold |
+| `autoindex` | `bool \| None` | `None` | Auto-create index if missing |
+| `limit` | `int \| None` | `None` | Maximum results |
+
+### `Pronunciation`
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `kind` | `EnumWrapper \| None` | The pronunciation system (e.g. IPA, Pinyin) |
+| `value` | `str` | The pronunciation notation |
+| `media` | `list[MediaURL]` | Audio URLs |
+
+### `MediaURL`
+
+| Property | Type | Description |
+|----------|------|-------------|
+| `src` | `str` | URL or path to the media file |
+| `mime_type` | `str \| None` | MIME type (e.g. `audio/mpeg`) |
+| `description` | `str \| None` | Description of the media |
diff --git a/docs/src/content/docs/api/rust.md b/docs/src/content/docs/api/rust.md
new file mode 100644
index 00000000..02d6ab25
--- /dev/null
+++ b/docs/src/content/docs/api/rust.md
@@ -0,0 +1,98 @@
+---
+title: Rust API
+description: Using the ODict Rust crate.
+---
+
+The `odict` crate is the core library that powers the CLI and all language bindings. It is published on [crates.io](https://crates.io/crates/odict).
+
+## Installation
+
+Add to your `Cargo.toml`:
+
+```toml
+[dependencies]
+odict = "2"
+```
+
+## Documentation
+
+Full API documentation is available on **docs.rs**:
+
+**[docs.rs/odict](https://docs.rs/odict)**
+
+## Feature flags
+
+The `odict` crate uses feature flags to control which capabilities are compiled in. The `default` feature includes `sql` and `config`.
+
+| Feature | Description |
+|---------|-------------|
+| `default` | Enables `sql` and `config` |
+| `sql` | SQL dump support (SQLite, PostgreSQL, MySQL) via sea-query |
+| `config` | Access to platform-specific config directories |
+| `alias` | Dictionary alias management (implies `config`) |
+| `search` | Full-text search via Tantivy (implies `config`) |
+| `markdown` | Markdown rendering support via pulldown-cmark |
+| `html` | HTML output support (implies `markdown`) |
+| `http` | Remote dictionary downloading (implies `config`) |
+| `tokenize` | Full multi-language tokenization (enables all language tokenizers) |
+| `tokenize-latin` | Latin-script tokenization |
+| `tokenize-chinese` | Chinese segmentation |
+| `tokenize-japanese` | Japanese segmentation (UniDic) |
+| `tokenize-korean` | Korean segmentation |
+| `tokenize-thai` | Thai segmentation |
+| `tokenize-khmer` | Khmer segmentation |
+| `tokenize-swedish` | Swedish recomposition |
+| `tokenize-german` | German segmentation |
+
+## Quick example
+
+```rust
+use odict::{OpenDictionary, ToDictionary};
+
+fn main() -> odict::Result<()> {
+    // Compile from XML
+    let xml = r#"
+      <dictionary name="Example">
+        <entry term="hello">
+          <ety>
+            <sense pos="intj">
+              <definition value="A greeting" />
+            </sense>
+          </ety>
+        </entry>
+      </dictionary>
+    "#;
+
+    // Compile and write to disk
+    let dict = xml.to_dictionary()?.build()?;
+    dict.to_disk("example.odict")?;
+
+    // Read from disk
+    let file = OpenDictionary::from_path("example.odict")?;
+    let contents = file.contents()?;
+
+    // Lookup
+    let results = contents.lookup(
+        &["hello"],
+        &odict::lookup::LookupOptions::default(),
+    )?;
+
+    println!("{:?}", results);
+    Ok(())
+}
+```
+
+## Key traits and types
+
+| Type | Description |
+|------|-------------|
+| `OpenDictionary` | A compiled dictionary loaded from disk or bytes |
+| `ToDictionary` | Trait for converting XML strings to `Dictionary` |
+| `Dictionary` | The deserialized dictionary schema type |
+| `CompilerOptions` | Options for compiling (compression settings) |
+| `lookup::LookupOptions` | Options for exact-match lookups |
+| `search::SearchOptions` | Options for full-text search |
+| `index::IndexOptions` | Options for creating a search index |
+| `tokenize::TokenizeOptions` | Options for text tokenization |
+
+Refer to the [docs.rs documentation](https://docs.rs/odict) for complete details on all types, traits, and methods.
diff --git a/docs/src/content/docs/cli/reference.md b/docs/src/content/docs/cli/reference.md
new file mode 100644
index 00000000..641bdc26
--- /dev/null
+++ b/docs/src/content/docs/cli/reference.md
@@ -0,0 +1,391 @@
+---
+title: CLI Reference
+description: Complete reference for the ODict command-line interface.
+---
+
+{/* This file is auto-generated by scripts/generate-cli-docs.mjs — do not edit manually. */}
+
+```
+odict [OPTIONS] <COMMAND>
+```
+
+The ODict CLI is the primary tool for creating, compiling, and querying ODict dictionaries.
+
+## Global options
+
+| Option | Description |
+|--------|-------------|
+| `-q, --quiet` | Silence any non-important output |
+| `-h, --help` | Print help |
+| `-V, --version` | Print version |
+
+---
+
+## Commands
+
+### `odict compile`
+
+Compiles a dictionary from ODXML.
+
+```
+odict compile <input> [-o <output>] [-q <quality>] [-w <window_size>]
+```
+
+#### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `input` | Yes | Path to ODXML file |
+
+#### Options
+
+| Flag | Description |
+|------|-------------|
+| `-o` | Output path of compiled dictionary |
+| `-q` | Brotli compression level (between 0 and 11) (default: `8`) |
+| `-w` | Brotli large window size (between 0 and 22) (default: `22`) |
+
+---
+
+### `odict download`
+
+Downloads a dictionary from the remote registry.
+
+```
+odict download <dictionary> [-o <output>] [--no-cache]
+```
+
+#### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `dictionary` | Yes | Dictionary to download (e.g., 'wiktionary/eng') |
+
+#### Options
+
+| Flag | Description |
+|------|-------------|
+| `-o, --output` | Directory to download the dictionary to (defaults to config directory) |
+| `--no-cache` | Disable caching (always download fresh copy) (default: `false`) |
+| `-r, --retries` | Number of times to retry loading the dictionary (remote-only) (default: `3`) |
+
+---
+
+### `odict dump`
+
+Outputs a dictionary in a human-readable format.
+
+```
+odict dump <input> [-f] [-o <output>]
+```
+
+#### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `input` | Yes | Path to a compile dictionary |
+
+#### Options
+
+| Flag | Description |
+|------|-------------|
+| `-f` | Format in which to dump the dictionary. (default: `xml`) |
+| `-o` | Output path of the dump. Defaults to stdout. |
+| `-r, --retries` | Number of times to retry loading the dictionary (remote-only) (default: `3`) |
+
+---
+
+### `odict index`
+
+Creates a full-text index of a compiled dictionary.
+
+```
+odict index <dictionary> [-d <directory>] [-f] [-m <memory>]
+```
+
+#### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `dictionary` | Yes | Path to a compiled dictionary or an alias |
+
+#### Options
+
+| Flag | Description |
+|------|-------------|
+| `-d` | Custom directory to store the index |
+| `-f` | Whether to overwrite the index if it already exists (default: `false`) |
+| `-m` | Memory arena per thread in bytes. Must be above 15MB. (default: `15000000`) |
+| `-r, --retries` | Number of times to retry loading the dictionary (remote-only) (default: `3`) |
+
+---
+
+### `odict info`
+
+Prints the metadata info for a dictionary file.
+
+```
+odict info <dictionary_path>
+```
+
+#### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `dictionary_path` | Yes | Path to a compiled dictionary |
+
+#### Options
+
+| Flag | Description |
+|------|-------------|
+| `-r, --retries` | Number of times to retry loading the dictionary (remote-only) (default: `3`) |
+
+---
+
+### `odict lexicon`
+
+Lists all words defined in a dictionary.
+
+```
+odict lexicon <dictionary>
+```
+
+#### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `dictionary` | Yes | Path to a compiled dictionary |
+
+#### Options
+
+| Flag | Description |
+|------|-------------|
+| `-r, --retries` | Number of times to retry loading the dictionary (remote-only) (default: `3`) |
+
+---
+
+### `odict lookup`
+
+Looks up an entry in a compiled dictionary without indexing.
+
+```
+odict lookup <dictionary_path> <queries...> [-f <format>] [-F] [-s <split>] [-i]
+```
+
+#### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `dictionary_path` | Yes | Path to a compiled dictionary |
+| `queries` | Yes | Words to look up |
+
+#### Options
+
+| Flag | Description |
+|------|-------------|
+| `-f, --format` | Output format of the entries (default: `print`) |
+| `-F, --follow` | Follow see_also redirects until finding an entry with etymologies |
+| `-s, --split` | If a definition cannot be found, attempt to split the query into words of at least length S and look up each word separately. Can be relatively slow. (default: `0`) |
+| `-i, --insensitive` | Perform case-insensitive lookups (default: `false`) |
+| `-r, --retries` | Number of times to retry loading the dictionary (remote-only) (default: `3`) |
+
+---
+
+### `odict merge`
+
+Merge entries from multiple dictionaries into a destination dictionary.
+
+```
+odict merge <destination> <sources...> [-o <output>]
+```
+
+#### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `destination` | Yes | Path of the dictionary to merge into (unless --output is specified) |
+| `sources` | Yes | Paths of dictionaries to merge |
+
+#### Options
+
+| Flag | Description |
+|------|-------------|
+| `-o, --output` | Separate output path for the compiled dictionary |
+| `-r, --retries` | Number of times to retry loading the dictionary (remote-only) (default: `3`) |
+
+---
+
+### `odict new`
+
+Scaffolds a new ODict XML dictionary.
+
+```
+odict new <file_name> [-n <name>]
+```
+
+#### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `file_name` | Yes | Name of your new dictionary file |
+
+#### Options
+
+| Flag | Description |
+|------|-------------|
+| `-n` | Name attribute of the dictionary element |
+
+---
+
+### `odict search`
+
+Run a full-text query on a compiled dictionary.
+
+```
+odict search <dictionary> <query> [-f] [--index]
+```
+
+#### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `dictionary` | Yes | Path to a compiled dictionary or an alias |
+| `query` | Yes | Search query |
+
+#### Options
+
+| Flag | Description |
+|------|-------------|
+| `-f, --format` | Format in which to print the results (default: `json`) |
+| `--index` | Creates a new index if one doesn't already exist (default: `false`) |
+| `-r, --retries` | Number of times to retry loading the dictionary (remote-only) (default: `3`) |
+
+---
+
+### `odict serve`
+
+Start a local web server to serve one or several dictionaries.
+
+```
+odict serve [dictionaries...] [-p <port>] [-c <capacity>] [-l]
+```
+
+#### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `dictionaries` | No |  |
+
+#### Options
+
+| Flag | Description |
+|------|-------------|
+| `-p` | Port to listen on (default: `5005`) |
+| `-c, --capacity` | Maximum number of dictionaries to keep in memory (default: `5`) |
+| `-l, --level` |  |
+
+#### HTTP endpoints
+
+When running `odict serve`, the following REST endpoints become available. All return JSON.
+
+##### `GET /{name}/lookup`
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `q` | string | Yes | |
+| `follow` | boolean | No | |
+| `split` | number | No | |
+
+##### `GET /{name}/search`
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `q` | string | Yes | |
+| `limit` | number | No | |
+
+##### `GET /{name}/tokenize`
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `text` | string | Yes | |
+| `follow` | boolean | No | |
+
+---
+
+### `odict tokenize`
+
+Tokenize text and find dictionary entries for each token.
+
+```
+odict tokenize <dictionary_path> <text> [-f <format>] [-F] [-i]
+```
+
+#### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `dictionary_path` | Yes | Path to a compiled dictionary |
+| `text` | Yes | Text to tokenize |
+
+#### Options
+
+| Flag | Description |
+|------|-------------|
+| `-f, --format` | Output format of the entries (default: `print`) |
+| `-F, --follow` | Follow see_also redirects until finding an entry with etymologies |
+| `-i, --insensitive` | Perform case-insensitive lookups when matching tokens (default: `false`) |
+| `-r, --retries` | Number of times to retry loading the dictionary (remote-only) (default: `3`) |
+
+---
+
+### `odict alias`
+
+Manage dictionary aliases.
+
+#### `odict alias add`
+
+Attempts to create a new dictionary alias, failing if one already exists with the given name.
+
+```
+odict alias add <name> <path>
+```
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `name` | Yes | Name of the alias |
+| `path` | Yes | Dictionary path |
+
+| Flag | Description |
+|------|-------------|
+| `-r, --retries` | Number of times to retry loading the dictionary (remote-only) (default: `3`) |
+
+#### `odict alias set`
+
+Creates or updates an existing dictionary alias.
+
+```
+odict alias set <name> <path>
+```
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `name` | Yes | Name of the alias |
+| `path` | Yes | Dictionary path |
+
+| Flag | Description |
+|------|-------------|
+| `-r, --retries` | Number of times to retry loading the dictionary (remote-only) (default: `3`) |
+
+#### `odict alias delete`
+
+Deletes an alias with the given name if it exists.
+
+```
+odict alias delete <name>
+```
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `name` | Yes | Name of the alias |
+
+---
diff --git a/docs/src/content/docs/getting-started/installation.md b/docs/src/content/docs/getting-started/installation.md
new file mode 100644
index 00000000..61f80d3d
--- /dev/null
+++ b/docs/src/content/docs/getting-started/installation.md
@@ -0,0 +1,71 @@
+---
+title: Installation
+description: How to install the ODict CLI and language bindings.
+---
+
+## CLI
+
+### Homebrew (macOS)
+
+```bash
+brew install TheOpenDictionary/odict/odict
+```
+
+### Shell installer (macOS / Linux)
+
+```bash
+curl --proto '=https' --tlsv1.2 -LsSf https://github.com/TheOpenDictionary/odict/releases/latest/download/odict-installer.sh | sh
+```
+
+### PowerShell installer (Windows)
+
+```powershell
+powershell -ExecutionPolicy ByPass -c "irm https://github.com/TheOpenDictionary/odict/releases/latest/download/odict-installer.ps1 | iex"
+```
+
+### From source
+
+Requires [Rust](https://rustup.rs/) 1.75+.
+
+```bash
+git clone https://github.com/TheOpenDictionary/odict.git
+cd odict
+cargo install --path cli
+```
+
+### Verify installation
+
+```bash
+odict --version
+```
+
+---
+
+## Language bindings
+
+### Python
+
+```bash
+pip install theopendictionary
+```
+
+Requires Python 3.8.1+. See the [Python API docs](/api/python/) for usage.
+
+### JavaScript (Node.js)
+
+```bash
+npm install @odict/node
+```
+
+Requires Node.js 12+. The package includes native binaries for all major platforms. See the [JavaScript API docs](/api/javascript/) for usage.
+
+### Rust
+
+Add the crate to your `Cargo.toml`:
+
+```toml
+[dependencies]
+odict = "2"
+```
+
+See the [Rust API docs](/api/rust/) for usage and feature flags.
diff --git a/docs/src/content/docs/getting-started/introduction.md b/docs/src/content/docs/getting-started/introduction.md
new file mode 100644
index 00000000..c5d86a2c
--- /dev/null
+++ b/docs/src/content/docs/getting-started/introduction.md
@@ -0,0 +1,42 @@
+---
+title: Introduction
+description: What is ODict and why does it exist?
+---
+
+**ODict** (The Open Dictionary) is a blazingly-fast, open-source dictionary file format designed for human languages. It provides a complete pipeline for defining, compiling, and querying dictionaries:
+
+1. **Define** your dictionary entries in a simple XML format (ODXML)
+2. **Compile** the XML into a compact binary `.odict` file
+3. **Query** the compiled dictionary using exact lookups, full-text search, or multi-language tokenization
+
+## Why ODict?
+
+Most dictionary data is locked in proprietary formats, scattered across inconsistent APIs, or stored in slow, bloated files. ODict addresses these problems:
+
+- **Universal schema** — A single, well-defined XML schema that can represent dictionaries for any human language, including etymologies, multiple senses, pronunciations, examples, and cross-references.
+- **Fast binary format** — Compiled `.odict` files use [rkyv](https://rkyv.org/) for zero-copy deserialization and Brotli compression, making lookups extremely fast even on large dictionaries.
+- **Full-text search** — Built-in indexing and search powered by [Tantivy](https://github.com/quickwit-oss/tantivy).
+- **Multi-language tokenization** — Tokenize text in Chinese, Japanese, Korean, Thai, Khmer, German, Swedish, and Latin-script languages, and automatically match tokens to dictionary entries.
+- **Cross-platform bindings** — Use ODict from Rust, Python, JavaScript (Node.js and browser), or through the CLI and HTTP server.
+
+## Architecture
+
+```
+┌─────────────┐     ┌──────────┐     ┌─────────────┐
+│  ODXML file  │────▶│ Compiler │────▶│  .odict file │
+│  (XML)       │     │          │     │  (binary)    │
+└─────────────┘     └──────────┘     └──────┬──────┘
+                                            │
+                    ┌───────────────────────┬┴──────────────────────┐
+                    │                       │                       │
+              ┌─────▼─────┐         ┌──────▼──────┐        ┌──────▼──────┐
+              │   Lookup   │         │   Search    │        │  Tokenize   │
+              │ (exact key)│         │ (full-text) │        │ (NLP-based) │
+              └───────────┘         └─────────────┘        └─────────────┘
+```
+
+## What's next?
+
+- [Install the CLI](/getting-started/installation/) to start working with dictionaries
+- [Quick Start](/getting-started/quickstart/) walks you through creating and compiling your first dictionary
+- Browse the [XML Schema Reference](/schema/reference/) to learn the full data model
diff --git a/docs/src/content/docs/getting-started/quickstart.md b/docs/src/content/docs/getting-started/quickstart.md
new file mode 100644
index 00000000..afc1023f
--- /dev/null
+++ b/docs/src/content/docs/getting-started/quickstart.md
@@ -0,0 +1,170 @@
+---
+title: Quick Start
+description: Create, compile, and query your first ODict dictionary.
+---
+
+This guide walks you through creating a simple dictionary, compiling it, and querying it with the CLI.
+
+## 1. Create a new dictionary
+
+Use the `odict new` command to scaffold a blank XML file:
+
+```bash
+odict new animals -n "Animal Dictionary"
+```
+
+This creates `animals.xml`:
+
+```xml
+<?xml version="1.0" encoding="UTF-8"?>
+<dictionary name="Animal Dictionary">
+</dictionary>
+```
+
+## 2. Add entries
+
+Open `animals.xml` and add some entries:
+
+```xml
+<?xml version="1.0" encoding="UTF-8"?>
+<dictionary name="Animal Dictionary">
+  <entry term="cat">
+    <ety description="From Latin cattus">
+      <sense pos="n">
+        <definition value="A small domesticated carnivorous mammal with soft fur">
+          <example value="The cat sat on the mat." />
+          <example value="She adopted two cats from the shelter." />
+        </definition>
+        <definition value="(informal) A person, especially a man">
+          <example value="He's a cool cat." />
+        </definition>
+      </sense>
+    </ety>
+  </entry>
+
+  <entry term="dog">
+    <ety description="From Old English docga">
+      <sense pos="n">
+        <definition value="A domesticated carnivorous mammal kept as a pet or for work">
+          <example value="The dog fetched the ball." />
+        </definition>
+      </sense>
+      <sense pos="v">
+        <definition value="To follow someone closely and persistently">
+          <example value="Reporters dogged the politician." />
+        </definition>
+      </sense>
+    </ety>
+  </entry>
+
+  <!-- Cross-reference: "kitty" redirects to "cat" -->
+  <entry term="kitty" see="cat" />
+</dictionary>
+```
+
+:::tip
+The `see` attribute creates a cross-reference. When you look up "kitty", ODict can follow it to the "cat" entry.
+:::
+
+## 3. Compile the dictionary
+
+```bash
+odict compile animals.xml
+```
+
+This produces `animals.odict` — a compact binary file. You can inspect it with:
+
+```bash
+odict info animals.odict
+```
+
+```
+Animal Dictionary
+─────────────────
+
+File Version: 3
+File Size: 312 B
+Entries: 3
+```
+
+## 4. Look up entries
+
+```bash
+odict lookup animals.odict cat
+```
+
+Output:
+
+```
+cat (From Latin cattus)
+
+  noun
+    1. A small domesticated carnivorous mammal with soft fur
+       • "The cat sat on the mat."
+       • "She adopted two cats from the shelter."
+    2. (informal) A person, especially a man
+       • "He's a cool cat."
+```
+
+### Follow cross-references
+
+```bash
+odict lookup animals.odict kitty -F 1
+```
+
+This follows the `see="cat"` redirect and returns the "cat" entry.
+
+### JSON output
+
+```bash
+odict lookup animals.odict cat -f json
+```
+
+Returns full structured JSON, useful for integration with other tools.
+
+## 5. Full-text search
+
+To search across all definitions, first create an index:
+
+```bash
+odict index animals.odict
+```
+
+Then search:
+
+```bash
+odict search animals.odict "domesticated mammal"
+```
+
+This returns all entries whose definitions match the query.
+
+:::note
+You can also pass `--index` to `odict search` to auto-create the index on the fly.
+:::
+
+## 6. Serve over HTTP
+
+Start a local server to query dictionaries via REST:
+
+```bash
+odict serve animals.odict -p 8080
+```
+
+Then query from any HTTP client:
+
+```bash
+# Lookup
+curl "http://localhost:8080/animals/lookup?queries=cat,dog"
+
+# Search
+curl "http://localhost:8080/animals/search?query=domesticated"
+
+# Tokenize
+curl "http://localhost:8080/animals/tokenize?text=the+cat+and+the+dog"
+```
+
+## What's next?
+
+- [XML Schema Reference](/schema/reference/) — learn the full XML format including pronunciations, notes, and groups
+- [CLI Reference](/cli/reference/) — complete command-line documentation
+- Language bindings: [Python](/api/python/), [JavaScript](/api/javascript/), [Rust](/api/rust/)
diff --git a/docs/src/content/docs/guides/compiling.mdx b/docs/src/content/docs/guides/compiling.mdx
new file mode 100644
index 00000000..7b28d195
--- /dev/null
+++ b/docs/src/content/docs/guides/compiling.mdx
@@ -0,0 +1,242 @@
+---
+title: Compiling Dictionaries
+description: How to compile ODict dictionaries programmatically from Rust, Python, and JavaScript.
+---
+
+import { Tabs, TabItem } from "@astrojs/starlight/components";
+
+This guide shows how to compile ODXML into `.odict` binary files programmatically. For CLI usage, see the [Quick Start](/getting-started/quickstart/).
+
+## Compiling from an XML string
+
+The simplest approach is to compile an XML string directly into an in-memory dictionary.
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+use odict::{OpenDictionary, ToDictionary};
+
+fn main() -> odict::Result<()> {
+    let xml = r#"
+      <dictionary name="My Dictionary">
+        <entry term="hello">
+          <ety>
+            <sense pos="intj">
+              <definition value="A greeting" />
+            </sense>
+          </ety>
+        </entry>
+      </dictionary>
+    "#;
+
+    // Parse XML → build binary → get OpenDictionary
+    let dict = xml.to_dictionary()?.build()?;
+
+    // Write to disk
+    dict.to_disk("my-dictionary.odict")?;
+
+    Ok(())
+}
+```
+</TabItem>
+<TabItem label="Python">
+```python
+from theopendictionary import OpenDictionary, compile
+
+xml = """
+<dictionary name="My Dictionary">
+  <entry term="hello">
+    <ety>
+      <sense pos="intj">
+        <definition value="A greeting" />
+      </sense>
+    </ety>
+  </entry>
+</dictionary>
+"""
+
+# Option 1: compile() returns raw bytes
+compiled_bytes = compile(xml)
+dictionary = OpenDictionary(compiled_bytes)
+
+# Option 2: pass XML directly to the constructor
+dictionary = OpenDictionary(xml)
+
+# Save to disk
+dictionary.save("my-dictionary.odict")
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+import { compile, OpenDictionary } from "@odict/node";
+
+const xml = `
+<dictionary name="My Dictionary">
+  <entry term="hello">
+    <ety>
+      <sense pos="intj">
+        <definition value="A greeting" />
+      </sense>
+    </ety>
+  </entry>
+</dictionary>
+`;
+
+// Option 1: compile() returns a Buffer
+const data = compile(xml);
+const dictionary = new OpenDictionary(data);
+
+// Option 2: pass XML directly to the constructor
+const dictionary = new OpenDictionary(xml);
+
+// Save to disk
+dictionary.save("my-dictionary.odict");
+```
+</TabItem>
+</Tabs>
+
+## Compiling from an XML file
+
+If your XML lives on disk, read it first and then compile.
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+use odict::schema::Dictionary;
+
+fn main() -> odict::Result<()> {
+    // Parse and compile from a file path
+    let dict = Dictionary::from_path("my-dictionary.xml")?
+        .build()?;
+
+    dict.to_disk("my-dictionary.odict")?;
+
+    Ok(())
+}
+```
+</TabItem>
+<TabItem label="Python">
+```python
+from theopendictionary import OpenDictionary, compile
+
+with open("my-dictionary.xml", "r") as f:
+    xml = f.read()
+
+compiled_bytes = compile(xml)
+dictionary = OpenDictionary(compiled_bytes)
+dictionary.save("my-dictionary.odict")
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+import { readFile } from "node:fs/promises";
+import { compile, OpenDictionary } from "@odict/node";
+
+const xml = await readFile("my-dictionary.xml", "utf-8");
+const data = compile(xml);
+const dictionary = new OpenDictionary(data);
+dictionary.save("my-dictionary.odict");
+```
+</TabItem>
+</Tabs>
+
+## Compression options
+
+ODict uses Brotli compression. You can configure the compression level when saving.
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+use odict::{compile::CompilerOptions, CompressOptions, ToDictionary};
+
+fn main() -> odict::Result<()> {
+    let xml = std::fs::read_to_string("my-dictionary.xml")?;
+
+    let compress = CompressOptions::default()
+        .quality(11)       // Maximum compression (0–11)
+        .window_size(22);  // Window size (0–22)
+
+    let options = CompilerOptions::default()
+        .with_compression(compress);
+
+    xml.as_str()
+        .to_dictionary()?
+        .build()?
+        .to_disk_with_options("my-dictionary.odict", options)?;
+
+    Ok(())
+}
+```
+</TabItem>
+<TabItem label="Python">
+```python
+dictionary.save(
+    "my-dictionary.odict",
+    quality=11,       # Maximum compression (0–11)
+    window_size=22    # Window size (0–22)
+)
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+dictionary.save("my-dictionary.odict", {
+  compress: {
+    quality: 11,     // Maximum compression (0–11)
+    windowSize: 22,  // Window size (0–22)
+  },
+});
+```
+</TabItem>
+</Tabs>
+
+## Loading compiled dictionaries
+
+Once compiled, you can load `.odict` files from disk or from the remote registry.
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+use odict::OpenDictionary;
+
+fn main() -> odict::Result<()> {
+    // Load from disk
+    let file = OpenDictionary::from_path("my-dictionary.odict")?;
+    let dict = file.contents()?;
+
+    println!("Entries: {}", dict.entries.len());
+
+    Ok(())
+}
+```
+</TabItem>
+<TabItem label="Python">
+```python
+import asyncio
+from theopendictionary import OpenDictionary
+
+async def main():
+    # Load from disk
+    dictionary = await OpenDictionary.load("./my-dictionary.odict")
+
+    # Load from remote registry
+    dictionary = await OpenDictionary.load("wiktionary/eng")
+
+    print(dictionary.lexicon())
+
+asyncio.run(main())
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+import { OpenDictionary } from "@odict/node";
+
+// Load from disk
+const dictionary = await OpenDictionary.load("./my-dictionary.odict");
+
+// Load from remote registry
+const dictionary = await OpenDictionary.load("wiktionary/eng");
+
+console.log(dictionary.lexicon());
+```
+</TabItem>
+</Tabs>
diff --git a/docs/src/content/docs/guides/lookup.mdx b/docs/src/content/docs/guides/lookup.mdx
new file mode 100644
index 00000000..cbcea0aa
--- /dev/null
+++ b/docs/src/content/docs/guides/lookup.mdx
@@ -0,0 +1,312 @@
+---
+title: Looking Up Entries
+description: How to look up dictionary entries by exact match from Rust, Python, and JavaScript.
+---
+
+import { Tabs, TabItem } from "@astrojs/starlight/components";
+
+Lookup is the fastest way to query a dictionary — it finds entries by exact term match without requiring an index.
+
+## Basic lookup
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+use odict::{OpenDictionary, lookup::LookupOptions};
+
+fn main() -> odict::Result<()> {
+    let file = OpenDictionary::from_path("my-dictionary.odict")?;
+    let dict = file.contents()?;
+
+    let results = dict.lookup(
+        &vec!["cat"],
+        LookupOptions::default(),
+    )?;
+
+    for result in &results {
+        println!("{}", result.entry.term.as_str());
+    }
+
+    Ok(())
+}
+```
+</TabItem>
+<TabItem label="Python">
+```python
+from theopendictionary import OpenDictionary, compile
+
+dictionary = OpenDictionary("<dictionary>...</dictionary>")
+
+results = dictionary.lookup("cat")
+print(results[0].entry.term)  # "cat"
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+import { OpenDictionary } from "@odict/node";
+
+const dictionary = await OpenDictionary.load("./my-dictionary.odict");
+
+const results = dictionary.lookup("cat");
+console.log(results[0].entry.term); // "cat"
+```
+</TabItem>
+</Tabs>
+
+## Looking up multiple terms
+
+You can look up several terms in a single call. Results are returned for each matched term.
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+let results = dict.lookup(
+    &vec!["cat", "dog", "run"],
+    LookupOptions::default(),
+)?;
+
+for result in &results {
+    println!("Found: {}", result.entry.term.as_str());
+}
+```
+</TabItem>
+<TabItem label="Python">
+```python
+results = dictionary.lookup(["cat", "dog", "run"])
+
+for result in results:
+    print(f"Found: {result.entry.term}")
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+const results = dictionary.lookup(["cat", "dog", "run"]);
+
+for (const result of results) {
+  console.log(`Found: ${result.entry.term}`);
+}
+```
+</TabItem>
+</Tabs>
+
+## Following cross-references
+
+Entries can redirect to other entries using the `see` attribute (e.g. "ran" → "run"). Enable `follow` to automatically resolve these.
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+use odict::lookup::LookupOptions;
+
+let options = LookupOptions::default().follow(true);
+
+let results = dict.lookup(&vec!["ran"], options)?;
+
+// "ran" redirects to "run"
+assert_eq!(results[0].entry.term.as_str(), "run");
+
+// directed_from tells you the original entry
+if let Some(from) = &results[0].directed_from {
+    println!("Redirected from: {}", from.term.as_str());
+}
+```
+</TabItem>
+<TabItem label="Python">
+```python
+results = dictionary.lookup("ran", follow=True)
+
+# "ran" redirects to "run"
+print(results[0].entry.term)          # "run"
+print(results[0].directed_from.term)  # "ran"
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+const results = dictionary.lookup("ran", { follow: true });
+
+// "ran" redirects to "run"
+console.log(results[0].entry.term);           // "run"
+console.log(results[0].directedFrom?.term);   // "ran"
+```
+</TabItem>
+</Tabs>
+
+:::tip
+When `follow` is enabled, ODict walks the `see` chain until it finds an entry with etymologies. It also detects circular references and returns an error instead of looping infinitely.
+:::
+
+## Case-insensitive lookup
+
+By default, lookups are case-sensitive. Enable `insensitive` to fall back to lowercase matching when the exact case doesn't match.
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+let options = LookupOptions::default().insensitive(true);
+
+// "CAT" will match "cat"
+let results = dict.lookup(&vec!["CAT"], options)?;
+
+assert_eq!(results[0].entry.term.as_str(), "cat");
+```
+</TabItem>
+<TabItem label="Python">
+```python
+# "CAT" will match "cat"
+results = dictionary.lookup("CAT", insensitive=True)
+
+print(results[0].entry.term)  # "cat"
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+// "CAT" will match "cat"
+const results = dictionary.lookup("CAT", { insensitive: true });
+
+console.log(results[0].entry.term); // "cat"
+```
+</TabItem>
+</Tabs>
+
+## Compound word splitting
+
+If a term isn't found, ODict can split it into substrings and look up each part. The `split` parameter sets the minimum character length for each fragment.
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+use odict::lookup::{LookupOptions, LookupStrategy};
+
+let options = LookupOptions::default()
+    .strategy(LookupStrategy::Split(3));
+
+// "catdog" isn't a word, but "cat" and "dog" are
+let results = dict.lookup(&vec!["catdog"], options)?;
+
+for result in &results {
+    println!("Found: {}", result.entry.term.as_str());
+}
+// Prints: "cat", "dog"
+```
+</TabItem>
+<TabItem label="Python">
+```python
+# "catdog" isn't a word, but "cat" and "dog" are
+results = dictionary.lookup("catdog", split=3)
+
+for result in results:
+    print(result.entry.term)
+# Prints: "cat", "dog"
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+// "catdog" isn't a word, but "cat" and "dog" are
+const results = dictionary.lookup("catdog", { split: 3 });
+
+for (const result of results) {
+  console.log(result.entry.term);
+}
+// Prints: "cat", "dog"
+```
+</TabItem>
+</Tabs>
+
+## Combining options
+
+All lookup options can be combined.
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+let options = LookupOptions::default()
+    .follow(true)
+    .insensitive(true)
+    .strategy(LookupStrategy::Split(3));
+
+let results = dict.lookup(&vec!["RaN"], options)?;
+```
+</TabItem>
+<TabItem label="Python">
+```python
+results = dictionary.lookup("RaN", follow=True, insensitive=True, split=3)
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+const results = dictionary.lookup("RaN", {
+  follow: true,
+  insensitive: true,
+  split: 3,
+});
+```
+</TabItem>
+</Tabs>
+
+## Reading entry data
+
+Once you have a `LookupResult`, you can traverse the entry's structure: etymologies, senses, definitions, examples, and more.
+
+<Tabs>
+<TabItem label="Python">
+```python
+results = dictionary.lookup("cat")
+entry = results[0].entry
+
+print(f"Term: {entry.term}")
+
+for ety in entry.etymologies:
+    for sense in ety.senses:
+        print(f"  Part of speech: {sense.pos}")
+        for defn in sense.definitions:
+            print(f"    {defn.value}")
+            for example in defn.examples:
+                print(f"      e.g. {example.value}")
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+const results = dictionary.lookup("cat");
+const entry = results[0].entry;
+
+console.log(`Term: ${entry.term}`);
+
+for (const ety of entry.etymologies) {
+  for (const sense of ety.senses) {
+    console.log(`  Part of speech: ${sense.pos.value}`);
+    for (const defn of sense.definitions) {
+      if ("value" in defn) {
+        console.log(`    ${defn.value}`);
+        for (const example of defn.examples) {
+          console.log(`      e.g. ${example.value}`);
+        }
+      }
+    }
+  }
+}
+```
+</TabItem>
+<TabItem label="Rust">
+```rust
+let results = dict.lookup(&vec!["cat"], LookupOptions::default())?;
+
+for result in &results {
+    let entry = result.entry.deserialize()?;
+
+    println!("Term: {}", entry.term);
+
+    for ety in &entry.etymologies {
+        for (pos, sense) in &ety.senses {
+            println!("  Part of speech: {}", pos);
+            for defn in &sense.definitions {
+                println!("    {}", defn.value);
+                for example in &defn.examples {
+                    println!("      e.g. {}", example.value);
+                }
+            }
+        }
+    }
+}
+```
+</TabItem>
+</Tabs>
diff --git a/docs/src/content/docs/guides/search.mdx b/docs/src/content/docs/guides/search.mdx
new file mode 100644
index 00000000..86d64c80
--- /dev/null
+++ b/docs/src/content/docs/guides/search.mdx
@@ -0,0 +1,176 @@
+---
+title: Searching Dictionaries
+description: How to index and run full-text searches on ODict dictionaries from Rust, Python, and JavaScript.
+---
+
+import { Tabs, TabItem } from "@astrojs/starlight/components";
+
+Full-text search lets you find entries by matching against their definitions, not just their headwords. Unlike [lookup](/guides/lookup/) which requires an exact term match, search uses a [Tantivy](https://github.com/quickwit-oss/tantivy)-powered full-text index.
+
+## Creating an index
+
+Before you can search, you need to create a full-text index. This only needs to be done once per dictionary (the index is persisted to disk).
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+use odict::{OpenDictionary, index::IndexOptions};
+
+fn main() -> odict::Result<()> {
+    let file = OpenDictionary::from_path("my-dictionary.odict")?;
+    let dict = file.contents()?;
+
+    dict.index(IndexOptions::default())?;
+
+    Ok(())
+}
+```
+</TabItem>
+<TabItem label="Python">
+```python
+from theopendictionary import OpenDictionary
+
+dictionary = await OpenDictionary.load("./my-dictionary.odict")
+dictionary.index()
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+import { OpenDictionary } from "@odict/node";
+
+const dictionary = await OpenDictionary.load("./my-dictionary.odict");
+dictionary.index();
+```
+</TabItem>
+</Tabs>
+
+## Index options
+
+You can configure the indexing behavior.
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+use odict::index::IndexOptions;
+
+let options = IndexOptions::default()
+    .dir("./my-index")         // Custom index directory
+    .overwrite(true)           // Overwrite existing index
+    .memory(50_000_000);       // 50MB memory arena per thread
+
+dict.index(options)?;
+```
+</TabItem>
+<TabItem label="Python">
+```python
+from theopendictionary import IndexOptions
+
+dictionary.index(IndexOptions(
+    directory="./my-index",    # Custom index directory
+    overwrite=True,            # Overwrite existing index
+    memory=50_000_000          # 50MB memory arena per thread
+))
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+dictionary.index({
+  directory: "./my-index",     // Custom index directory
+  overwrite: true,             // Overwrite existing index
+  memory: 50_000_000,          // 50MB memory arena per thread
+});
+```
+</TabItem>
+</Tabs>
+
+## Running a search
+
+Once indexed, you can search across all definitions in the dictionary.
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+use odict::search::SearchOptions;
+
+let results = dict.search("domesticated mammal", SearchOptions::default())?;
+
+for entry in &results {
+    println!("{}", entry.term);
+}
+```
+</TabItem>
+<TabItem label="Python">
+```python
+results = dictionary.search("domesticated mammal")
+
+for entry in results:
+    print(entry.term)
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+const results = dictionary.search("domesticated mammal");
+
+for (const entry of results) {
+  console.log(entry.term);
+}
+```
+</TabItem>
+</Tabs>
+
+## Search options
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+use odict::search::SearchOptions;
+
+let options = SearchOptions::default()
+    .dir("./my-index")        // Custom index directory
+    .autoindex(true)           // Auto-create index if missing
+    .limit(10)                 // Max results to return
+    .threshold(50);            // Relevance threshold
+
+let results = dict.search("greeting", options)?;
+```
+</TabItem>
+<TabItem label="Python">
+```python
+from theopendictionary import SearchOptions
+
+results = dictionary.search("greeting", SearchOptions(
+    directory="./my-index",    # Custom index directory
+    autoindex=True,            # Auto-create index if missing
+    limit=10,                  # Max results to return
+    threshold=50               # Relevance threshold
+))
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+const results = dictionary.search("greeting", {
+  directory: "./my-index",     // Custom index directory
+  autoindex: true,             // Auto-create index if missing
+  limit: 10,                   // Max results to return
+  threshold: 50,               // Relevance threshold
+});
+```
+</TabItem>
+</Tabs>
+
+:::tip
+The `autoindex` option is convenient for one-off scripts — it creates the index on the fly if one doesn't exist yet. For production use, create the index ahead of time with `index()` to avoid the startup cost on first search.
+:::
+
+## Search vs. lookup
+
+| | Lookup | Search |
+|---|--------|--------|
+| **Matches against** | Entry terms (headwords) | Definition text |
+| **Requires index** | No | Yes |
+| **Speed** | O(1) per term | Depends on index size |
+| **Use case** | You know the exact word | You're searching by meaning |
+| **Supports splitting** | Yes | No |
+| **Supports follow** | Yes | No |
+
+In most applications you'll use both: lookup for direct dictionary access, and search for discovery.
diff --git a/docs/src/content/docs/guides/tokenize.mdx b/docs/src/content/docs/guides/tokenize.mdx
new file mode 100644
index 00000000..22df8303
--- /dev/null
+++ b/docs/src/content/docs/guides/tokenize.mdx
@@ -0,0 +1,228 @@
+---
+title: Tokenizing Text
+description: How to tokenize text and match tokens against dictionary entries using ODict's NLP tokenizer.
+---
+
+import { Tabs, TabItem } from "@astrojs/starlight/components";
+
+ODict includes a built-in NLP tokenizer that segments text into words and automatically matches each token against dictionary entries. This is especially useful for languages without whitespace-delimited words (Chinese, Japanese, Korean, Thai, Khmer) as well as compound-word languages (German, Swedish).
+
+## Supported languages
+
+| Language family | Languages | Tokenizer |
+|----------------|-----------|-----------|
+| Chinese | Simplified & Traditional Chinese | jieba |
+| Japanese | Japanese | Lindera (UniDic) |
+| Korean | Korean | Lindera (KoDic) |
+| Thai | Thai | ICU-based |
+| Khmer | Khmer | ICU-based |
+| Germanic | German, Swedish | Compound word splitting |
+| Latin-script | English, French, Spanish, etc. | Unicode word boundaries |
+
+## Basic tokenization
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+use odict::{OpenDictionary, tokenize::TokenizeOptions};
+
+fn main() -> odict::Result<()> {
+    let file = OpenDictionary::from_path("my-dictionary.odict")?;
+    let dict = file.contents()?;
+
+    let tokens = dict.tokenize(
+        "the cat ran",
+        TokenizeOptions::default(),
+    )?;
+
+    for token in &tokens {
+        println!("'{}' ({} entries found)",
+            token.lemma,
+            token.entries.len()
+        );
+    }
+
+    Ok(())
+}
+```
+</TabItem>
+<TabItem label="Python">
+```python
+from theopendictionary import OpenDictionary
+
+dictionary = OpenDictionary("<dictionary>...</dictionary>")
+
+tokens = dictionary.tokenize("the cat ran")
+
+for token in tokens:
+    print(f"'{token.lemma}' ({len(token.entries)} entries found)")
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+import { OpenDictionary } from "@odict/node";
+
+const dictionary = await OpenDictionary.load("./my-dictionary.odict");
+
+const tokens = dictionary.tokenize("the cat ran");
+
+for (const token of tokens) {
+  console.log(`'${token.lemma}' (${token.entries.length} entries found)`);
+}
+```
+</TabItem>
+</Tabs>
+
+## Chinese text tokenization
+
+For Chinese (and other CJK languages), ODict automatically detects the script and uses the appropriate segmenter.
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+let tokens = dict.tokenize("你好世界", TokenizeOptions::default())?;
+
+for token in &tokens {
+    println!("Lemma: {}, Script: {:?}, Language: {:?}",
+        token.lemma,
+        token.script.name(),
+        token.language.as_ref().map(|l| l.code())
+    );
+}
+```
+</TabItem>
+<TabItem label="Python">
+```python
+tokens = dictionary.tokenize("你好世界")
+
+for token in tokens:
+    print(f"Lemma: {token.lemma}, Script: {token.script}, Language: {token.language}")
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+const tokens = dictionary.tokenize("你好世界");
+
+for (const token of tokens) {
+  console.log(`Lemma: ${token.lemma}, Script: ${token.script}, Language: ${token.language}`);
+}
+```
+</TabItem>
+</Tabs>
+
+## Following cross-references
+
+Like [lookup](/guides/lookup/), tokenization supports following `see` cross-references.
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+let options = TokenizeOptions::default().follow(true);
+
+let tokens = dict.tokenize("the cat ran", options)?;
+
+for token in &tokens {
+    for result in &token.entries {
+        if let Some(from) = &result.directed_from {
+            println!("'{}' → '{}'",
+                from.term.as_str(),
+                result.entry.term.as_str()
+            );
+        }
+    }
+}
+// e.g. 'ran' → 'run'
+```
+</TabItem>
+<TabItem label="Python">
+```python
+tokens = dictionary.tokenize("the cat ran", follow=True)
+
+for token in tokens:
+    for result in token.entries:
+        if result.directed_from:
+            print(f"'{result.directed_from.term}' → '{result.entry.term}'")
+# e.g. 'ran' → 'run'
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+const tokens = dictionary.tokenize("the cat ran", { follow: true });
+
+for (const token of tokens) {
+  for (const result of token.entries) {
+    if (result.directedFrom) {
+      console.log(`'${result.directedFrom.term}' → '${result.entry.term}'`);
+    }
+  }
+}
+// e.g. 'ran' → 'run'
+```
+</TabItem>
+</Tabs>
+
+## Case-insensitive tokenization
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+let options = TokenizeOptions::default().insensitive(true);
+
+// "DOG" will match the "dog" entry
+let tokens = dict.tokenize("DOG cat", options)?;
+```
+</TabItem>
+<TabItem label="Python">
+```python
+# "DOG" will match the "dog" entry
+tokens = dictionary.tokenize("DOG cat", insensitive=True)
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+// "DOG" will match the "dog" entry
+const tokens = dictionary.tokenize("DOG cat", { insensitive: true });
+```
+</TabItem>
+</Tabs>
+
+## Token properties
+
+Each token returned by `tokenize()` includes metadata about the match.
+
+| Property | Description |
+|----------|-------------|
+| `lemma` | The original text of the token as it appears in the input |
+| `language` | Detected language code (e.g. `"cmn"` for Mandarin), if applicable |
+| `script` | Detected script name (e.g. `"Han"`, `"Latin"`) |
+| `kind` | Token kind (e.g. `"Word"`, `"Punctuation"`) |
+| `start` | Start byte offset in the original text |
+| `end` | End byte offset in the original text |
+| `entries` | Array of `LookupResult` objects for matched dictionary entries |
+
+## Combining options
+
+<Tabs>
+<TabItem label="Rust">
+```rust
+let options = TokenizeOptions::default()
+    .follow(true)
+    .insensitive(true);
+
+let tokens = dict.tokenize("The CAT RaN away", options)?;
+```
+</TabItem>
+<TabItem label="Python">
+```python
+tokens = dictionary.tokenize("The CAT RaN away", follow=True, insensitive=True)
+```
+</TabItem>
+<TabItem label="JavaScript">
+```typescript
+const tokens = dictionary.tokenize("The CAT RaN away", {
+  follow: true,
+  insensitive: true,
+});
+```
+</TabItem>
+</Tabs>
diff --git a/docs/src/content/docs/index.mdx b/docs/src/content/docs/index.mdx
new file mode 100644
index 00000000..55838e4a
--- /dev/null
+++ b/docs/src/content/docs/index.mdx
@@ -0,0 +1,38 @@
+---
+title: ODict
+description: The lightning-fast open-source dictionary file format for human languages.
+template: splash
+hero:
+  title: ODict
+  tagline: The lightning-fast open-source dictionary file format for human languages.
+  actions:
+    - text: Get Started
+      link: /getting-started/introduction/
+      icon: right-arrow
+    - text: View on GitHub
+      link: https://github.com/TheOpenDictionary/odict
+      icon: external
+      variant: minimal
+---
+
+import { Card, CardGrid } from "@astrojs/starlight/components";
+
+<CardGrid stagger>
+  <Card title="Write dictionaries in XML" icon="pencil">
+    Define your dictionary entries using a simple, well-documented XML schema
+    (ODXML) that supports etymologies, senses, definitions, examples,
+    pronunciations, and more.
+  </Card>
+  <Card title="Compile to a binary format" icon="rocket">
+    Compile your XML dictionaries into compact, blazingly-fast binary `.odict`
+    files using zero-copy deserialization via rkyv and Brotli compression.
+  </Card>
+  <Card title="Full-text search" icon="magnifier">
+    Index and search your compiled dictionaries with built-in full-text search
+    powered by Tantivy, with multi-language tokenization support.
+  </Card>
+  <Card title="Use from any language" icon="puzzle">
+    Native bindings for Python, JavaScript (Node.js and browser via WASI), and
+    Rust. Plus a powerful CLI and HTTP server for language-agnostic access.
+  </Card>
+</CardGrid>
diff --git a/docs/src/content/docs/schema/overview.md b/docs/src/content/docs/schema/overview.md
new file mode 100644
index 00000000..e309d380
--- /dev/null
+++ b/docs/src/content/docs/schema/overview.md
@@ -0,0 +1,174 @@
+---
+title: Schema Overview
+description: An overview of the ODict XML (ODXML) schema and how dictionaries are structured.
+---
+
+ODict dictionaries are authored in XML using the **ODXML** (Open Dictionary XML) format. This page provides a conceptual overview of how the schema is structured. For the full element-by-element reference, see the [Schema Reference](/schema/reference/).
+
+## Structure
+
+An ODXML file describes a dictionary as a hierarchy:
+
+```
+dictionary
+└── entry (one per headword)
+    ├── pronunciation (optional, entry-level)
+    └── ety (etymology — groups senses by word origin)
+        └── sense (groups definitions by part of speech)
+            ├── group (optional grouping of definitions)
+            │   └── definition
+            │       ├── example
+            │       └── note
+            └── definition
+                ├── example
+                └── note
+```
+
+## Minimal example
+
+The simplest valid dictionary:
+
+```xml
+<dictionary>
+  <entry term="hello">
+    <ety>
+      <sense pos="intj">
+        <definition value="A greeting" />
+      </sense>
+    </ety>
+  </entry>
+</dictionary>
+```
+
+## Entries and cross-references
+
+Each `<entry>` represents a headword. Entries can either contain full definitions (via `<ety>` children) or redirect to another entry using the `see` attribute:
+
+```xml
+<entry term="run">
+  <ety>
+    <sense pos="v">
+      <definition value="To move swiftly on foot" />
+    </sense>
+  </ety>
+</entry>
+
+<!-- "ran" redirects to "run" -->
+<entry term="ran" see="run" />
+```
+
+When looking up "ran" with the `follow` option enabled, ODict will resolve the cross-reference and return the "run" entry.
+
+## Etymologies
+
+If a word has multiple distinct origins, you can define multiple `<ety>` elements:
+
+```xml
+<entry term="bank">
+  <ety description="From Italian banca (bench)">
+    <sense pos="n">
+      <definition value="A financial institution" />
+    </sense>
+  </ety>
+  <ety description="From Old Norse bakki">
+    <sense pos="n">
+      <definition value="The land alongside a river" />
+    </sense>
+  </ety>
+</entry>
+```
+
+## Senses and parts of speech
+
+Within an etymology, `<sense>` elements group definitions by part of speech. The `pos` attribute accepts standard codes like `n` (noun), `v` (verb), `adj` (adjective), etc. See the [reference](/schema/reference/#parts-of-speech) for the full list.
+
+```xml
+<sense pos="n">
+  <definition value="An animal" />
+</sense>
+<sense pos="v">
+  <definition value="To follow persistently" />
+</sense>
+```
+
+If the part of speech is unknown or not applicable, you can omit `pos` entirely.
+
+## Definition groups
+
+When a sense has many definitions, you can organize them with `<group>`:
+
+```xml
+<sense pos="v">
+  <group description="Motion senses">
+    <definition value="To move swiftly on foot" />
+    <definition value="To flee" />
+  </group>
+  <group description="Operational senses">
+    <definition value="To operate or manage" />
+    <definition value="To execute a program" />
+  </group>
+</sense>
+```
+
+## Examples and notes
+
+Definitions can have `<example>` and `<note>` children:
+
+```xml
+<definition value="A small domesticated mammal">
+  <example value="The cat sat on the mat." />
+  <example value="She adopted two cats." />
+  <note value="Informal usage can also refer to a person">
+    <example value="He's a cool cat." />
+  </note>
+</definition>
+```
+
+## Pronunciations
+
+Pronunciations can be attached at the entry level and support any phonetic system:
+
+```xml
+<entry term="hello">
+  <pronunciation kind="ipa" value="həˈləʊ">
+    <url src="./audio/hello_uk.mp3" type="audio/mpeg" description="British" />
+  </pronunciation>
+  <pronunciation kind="ipa" value="hɛˈloʊ">
+    <url src="./audio/hello_us.mp3" type="audio/mpeg" description="American" />
+  </pronunciation>
+  <ety>
+    <sense pos="intj">
+      <definition value="A greeting">
+        <example value="Hello, how are you?">
+          <pronunciation kind="ipa" value="həˈləʊ haʊ ɑː juː" />
+        </example>
+      </definition>
+    </sense>
+  </ety>
+</entry>
+```
+
+This is especially useful for non-Latin scripts:
+
+```xml
+<entry term="你好">
+  <pronunciation kind="pinyin" value="nǐ hǎo" />
+  <pronunciation kind="ipa" value="ni˨˩ xɑʊ̯˧˥" />
+  ...
+</entry>
+```
+
+## XSD validation
+
+The schema is formally defined in [`odict.xsd`](https://github.com/TheOpenDictionary/odict/blob/main/odict.xsd). You can validate your XML against it:
+
+```xml
+<?xml version="1.0" encoding="UTF-8"?>
+<dictionary name="My Dictionary"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:noNamespaceSchemaLocation="odict.xsd">
+  ...
+</dictionary>
+```
+
+Most XML editors (VS Code with the XML extension, IntelliJ, etc.) will provide autocomplete and validation when the XSD is referenced.
diff --git a/docs/src/content/docs/schema/reference.md b/docs/src/content/docs/schema/reference.md
new file mode 100644
index 00000000..0d78b7c6
--- /dev/null
+++ b/docs/src/content/docs/schema/reference.md
@@ -0,0 +1,328 @@
+---
+title: XML Schema Reference
+description: Complete reference for the ODict XML (ODXML) schema.
+---
+
+{/* This file is auto-generated by scripts/generate-schema-docs.mjs — do not edit manually. */}
+
+This page is automatically generated from [`odict.xsd`](https://github.com/TheOpenDictionary/odict/blob/main/odict.xsd) and [`pos.rs`](https://github.com/TheOpenDictionary/odict/blob/main/lib/src/schema/pos.rs).
+
+## Element hierarchy
+
+```
+dictionary
+└── entry
+    ├── pronunciation
+    │   └── url
+    └── ety
+        └── sense
+            ├── group
+            │   └── definition
+            │       ├── example
+            │       │   └── pronunciation …
+            │       └── note
+            │           └── example …
+            └── definition …
+```
+
+---
+
+## Elements
+
+### `<dictionary>`
+
+#### Attributes
+
+| Attribute | Type | Required |
+|-----------|------|----------|
+| `id` | `string` | No |
+| `name` | `string` | No |
+
+#### Child elements
+
+| Element | Min | Max |
+|---------|-----|-----|
+| [`<entry>`](#entry) | 1 | unbounded |
+
+---
+
+### `<entry>`
+
+#### Attributes
+
+| Attribute | Type | Required |
+|-----------|------|----------|
+| `term` | `string` | Yes |
+| `see` | `string` | No |
+
+#### Child elements
+
+| Element | Min | Max |
+|---------|-----|-----|
+| [`<pronunciation>`](#pronunciation) | 0 | unbounded |
+| [`<ety>`](#ety) | 0 | unbounded |
+
+---
+
+### `<ety>`
+
+#### Attributes
+
+| Attribute | Type | Required |
+|-----------|------|----------|
+| `id` | `string` | No |
+| `pronunciation` | `string` | No |
+| `description` | `string` | No |
+
+#### Child elements
+
+| Element | Min | Max |
+|---------|-----|-----|
+| [`<sense>`](#sense) | 1 | unbounded |
+
+---
+
+### `<sense>`
+
+#### Attributes
+
+| Attribute | Type | Required |
+|-----------|------|----------|
+| `pos` | `string` | No |
+
+#### Child elements
+
+| Element | Min | Max |
+|---------|-----|-----|
+| [`<group>`](#group) | 0 | unbounded |
+| [`<definition>`](#definition) | 0 | unbounded |
+
+---
+
+### `<group>`
+
+#### Attributes
+
+| Attribute | Type | Required |
+|-----------|------|----------|
+| `id` | `string` | No |
+| `description` | `string` | No |
+
+#### Child elements
+
+| Element | Min | Max |
+|---------|-----|-----|
+| [`<definition>`](#definition) | 1 | unbounded |
+
+---
+
+### `<definition>`
+
+#### Attributes
+
+| Attribute | Type | Required |
+|-----------|------|----------|
+| `id` | `string` | No |
+| `value` | `string` | Yes |
+
+#### Child elements
+
+| Element | Min | Max |
+|---------|-----|-----|
+| [`<example>`](#example) | 0 | unbounded |
+| [`<note>`](#note) | 0 | unbounded |
+
+---
+
+### `<note>`
+
+#### Attributes
+
+| Attribute | Type | Required |
+|-----------|------|----------|
+| `id` | `string` | No |
+| `value` | `string` | Yes |
+
+#### Child elements
+
+| Element | Min | Max |
+|---------|-----|-----|
+| [`<example>`](#example) | 1 | unbounded |
+
+---
+
+### `<example>`
+
+#### Attributes
+
+| Attribute | Type | Required |
+|-----------|------|----------|
+| `value` | `string` | Yes |
+
+#### Child elements
+
+| Element | Min | Max |
+|---------|-----|-----|
+| [`<pronunciation>`](#pronunciation) | 0 | unbounded |
+
+---
+
+### `<pronunciation>`
+
+#### Attributes
+
+| Attribute | Type | Required |
+|-----------|------|----------|
+| `kind` | `string` | Yes |
+| `value` | `string` | Yes |
+
+#### Child elements
+
+| Element | Min | Max |
+|---------|-----|-----|
+| [`<url>`](#url) | 0 | unbounded |
+
+---
+
+### `<url>`
+
+#### Attributes
+
+| Attribute | Type | Required |
+|-----------|------|----------|
+| `src` | `string` | Yes |
+| `type` | `string` | No |
+| `description` | `string` | No |
+
+---
+
+## Parts of speech
+
+The `pos` attribute on `<sense>` accepts the following values. You can also pass any custom string, which will be treated as a custom part of speech.
+
+### Universal
+
+| Code | Label |
+|------|-------|
+| `art` | article |
+| `abv` | abbreviation |
+| `adf` | adfix |
+| `adj` | adjective |
+| `phr_adj` | adjective phrase |
+| `adv` | adverb |
+| `phr_adv` | adverbial phrase |
+| `aff` | affix |
+| `aux` | auxiliary |
+| `aux_adj` | auxiliary adjective |
+| `aux_v` | auxiliary verb |
+| `chr` | character |
+| `cf` | circumfix |
+| `cls` | classifier |
+| `conj` | conjunction |
+| `conj_c` | coordinating conjunction |
+| `contr` | contraction |
+| `cop` | copula |
+| `ctr` | counter |
+| `det` | determiner |
+| `expr` | expression |
+| `inf` | infix |
+| `intf` | interfix |
+| `intj` | interjection |
+| `vi` | intransitive verb |
+| `name` | name |
+| `n` | noun |
+| `num` | numeric |
+| `part` | particle |
+| `phr` | phrase |
+| `postp` | postposition |
+| `pref` | prefix |
+| `prep` | preposition |
+| `phr_prep` | prepositional phrase |
+| `pron` | pronoun |
+| `propn` | proper noun |
+| `prov` | proverb |
+| `punc` | punctuation |
+| `conj_s` | subordinating conjunction |
+| `suff` | suffix |
+| `sym` | symbol |
+| `vt` | transitive verb |
+| `un` | unknown |
+| `v` | verb |
+
+### Japanese-specific
+
+| Code | Label |
+|------|-------|
+| `adj_pn` | pre-noun adjectival (rentaishi) |
+| `adj_kari` | 'kari' adjective (archaic) |
+| `adj_ku` | 'ku' adjective (archaic) |
+| `adj_nari` | archaic/formal form of na-adjective |
+| `adj_na` | adjectival nouns or quasi-adjectives (keiyodoshi) |
+| `adj_shiku` | 'shiku' adjective (archaic) |
+| `adj_t` | 'taru' adjective |
+| `adj_ix` | adjective (keiyoushi) - yoi/ii class |
+| `n_adv` | adverbial noun (fukushitekimeishi) |
+| `adv_to` | adverb taking the 'to' particle |
+| `adj_no` | nouns which may take the genitive case particle 'no' |
+| `n_pref` | noun, used as a prefix |
+| `n_suf` | noun, used as a suffix |
+| `nt` | noun (temporal) (jisoumeishi) |
+| `adj_f` | noun or verb acting prenominally |
+| `v5b` | Godan verb with 'bu' ending |
+| `v5g` | Godan verb with 'gu' ending |
+| `v5k` | Godan verb with 'ku' ending |
+| `v5m` | Godan verb with 'mu' ending |
+| `v5n` | Godan verb with 'nu' ending |
+| `v5r` | Godan verb with 'ru' ending |
+| `v5r_i` | Godan verb with 'ru' ending (irregular verb) |
+| `v5aru` | Godan verb - -aru special class |
+| `v5k_s` | Godan verb - Iku/Yuku special class |
+| `v5s` | Godan verb with 'su' ending |
+| `v5t` | Godan verb with 'tsu' ending |
+| `v5u` | Godan verb with 'u' ending |
+| `v5uru` | Godan verb - Uru old class verb (old form of Eru) |
+| `v5u_s` | Godan verb with 'u' ending (special class) |
+| `v1` | Ichidan verb |
+| `v1s` | Ichidan verb - kureru special class |
+| `vz` | Ichidan verb - zuru verb (alternative form of -jiru verbs) |
+| `vk` | Kuru verb - special class |
+| `v2b_s` | Nidan verb (lower class) with 'bu' ending (archaic) |
+| `v2b_k` | Nidan verb (upper class) with 'bu' ending (archaic) |
+| `v2d_s` | Nidan verb (lower class) with 'dzu' ending (archaic) |
+| `v2d_k` | Nidan verb (upper class) with 'dzu' ending (archaic) |
+| `v2g_s` | Nidan verb (lower class) with 'gu' ending (archaic) |
+| `v2g_k` | Nidan verb (upper class) with 'gu' ending (archaic) |
+| `v2h_s` | Nidan verb (lower class) with 'hu/fu' ending (archaic) |
+| `v2h_k` | Nidan verb (upper class) with 'hu/fu' ending (archaic) |
+| `v2k_s` | Nidan verb (lower class) with 'ku' ending (archaic) |
+| `v2k_k` | Nidan verb (upper class) with 'ku' ending (archaic) |
+| `v2m_s` | Nidan verb (lower class) with 'mu' ending (archaic) |
+| `v2m_k` | Nidan verb (upper class) with 'mu' ending (archaic) |
+| `v2n_s` | Nidan verb (lower class) with 'nu' ending (archaic) |
+| `v2r_s` | Nidan verb (lower class) with 'ru' ending (archaic) |
+| `v2r_k` | Nidan verb (upper class) with 'ru' ending (archaic) |
+| `v2s_s` | Nidan verb (lower class) with 'su' ending (archaic) |
+| `v2t_s` | Nidan verb (lower class) with 'tsu' ending (archaic) |
+| `v2t_k` | Nidan verb (upper class) with 'tsu' ending (archaic) |
+| `v2a_s` | Nidan verb with 'u' ending (archaic) |
+| `v2w_s` | Nidan verb (lower class) with 'u' ending and 'we' conjugation (archaic) |
+| `v2y_s` | Nidan verb (lower class) with 'yu' ending (archaic) |
+| `v2y_k` | Nidan verb (upper class) with 'yu' ending (archaic) |
+| `v2z_s` | Nidan verb (lower class) with 'zu' ending (archaic) |
+| `vn` | irregular nu verb |
+| `vr` | irregular ru verb, plain form ends with -ri |
+| `vs_c` | su verb - precursor to the modern suru |
+| `vs` | noun or participle which takes the aux. verb suru |
+| `vs_i` | suru verb - included |
+| `vs_s` | suru verb - special class |
+| `v_unspec` | verb unspecified |
+| `v4b` | Yodan verb with 'bu' ending (archaic) |
+| `v4g` | Yodan verb with 'gu' ending (archaic) |
+| `v4h` | Yodan verb with 'hu/fu' ending (archaic) |
+| `v4k` | Yodan verb with 'ku' ending (archaic) |
+| `v4m` | Yodan verb with 'mu' ending (archaic) |
+| `v4n` | Yodan verb with 'nu' ending (archaic) |
+| `v4r` | Yodan verb with 'ru' ending (archaic) |
+| `v4s` | Yodan verb with 'su' ending (archaic) |
+| `v4t` | Yodan verb with 'tsu' ending (archaic) |
+
diff --git a/docs/tsconfig.json b/docs/tsconfig.json
new file mode 100644
index 00000000..bcbf8b50
--- /dev/null
+++ b/docs/tsconfig.json
@@ -0,0 +1,3 @@
+{
+  "extends": "astro/tsconfigs/strict"
+}
diff --git a/lib/src/core/compile.rs b/lib/src/core/compile.rs
index 0f2c8c34..d7f92a31 100644
--- a/lib/src/core/compile.rs
+++ b/lib/src/core/compile.rs
@@ -1,3 +1,45 @@
+//! Dictionary compilation and binary serialization.
+//!
+//! This module provides functionality to compile dictionary data structures into
+//! the ODict binary format. The compilation process involves serialization,
+//! compression, and packaging with metadata headers.
+//!
+//! # Binary Format Structure
+//!
+//! The ODict binary format consists of:
+//! 1. **Signature** (5 bytes): "ODICT" magic bytes
+//! 2. **Version Length** (8 bytes): Length of version string
+//! 3. **Version** (variable): Semantic version string
+//! 4. **Content Length** (8 bytes): Length of compressed content
+//! 5. **Content** (variable): Compressed serialized dictionary data
+//!
+//! # Examples
+//!
+//! ## Basic Compilation
+//!
+//! ```rust
+//! use odict::{Dictionary, CompilerOptions};
+//!
+//! let dict = Dictionary::from_path("dictionary.xml")?;
+//! let compiled = dict.build()?;
+//! let bytes = compiled.to_bytes()?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## Compilation with Custom Compression
+//!
+//! ```rust
+//! use odict::{Dictionary, CompilerOptions, CompressOptions};
+//!
+//! let dict = Dictionary::from_path("dictionary.xml")?;
+//! let compiled = dict.build()?;
+//!
+//! let options = CompilerOptions::default()
+//!     .with_compression(CompressOptions::default());
+//! let bytes = compiled.to_bytes_with_options(options)?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+
 use crate::compress::{compress, CompressOptions};
 use crate::error::Error;
 use crate::schema::Dictionary;
@@ -5,8 +47,13 @@ use crate::OpenDictionary;
 
 use super::consts::{SIGNATURE, VERSION};
 
+/// Configuration options for dictionary compilation.
+///
+/// This struct allows customization of the compilation process, particularly
+/// compression settings that affect the final binary size and performance.
 #[derive(Default)]
 pub struct CompilerOptions {
+    /// Compression options to use during compilation.
     pub compress_options: CompressOptions,
 }
 
@@ -17,6 +64,20 @@ impl AsRef<CompilerOptions> for CompilerOptions {
 }
 
 impl CompilerOptions {
+    /// Set custom compression options for the compilation process.
+    ///
+    /// # Arguments
+    ///
+    /// * `compress_options` - The compression configuration to use
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::{CompilerOptions, CompressOptions};
+    ///
+    /// let options = CompilerOptions::default()
+    ///     .with_compression(CompressOptions::default());
+    /// ```
     pub fn with_compression(mut self, compress_options: CompressOptions) -> Self {
         self.compress_options = compress_options;
         self
@@ -24,10 +85,70 @@ impl CompilerOptions {
 }
 
 impl OpenDictionary {
+    /// Convert the dictionary to binary format using default compilation options.
+    ///
+    /// This method serializes the dictionary into the ODict binary format,
+    /// applying default compression and packaging it with the appropriate headers.
+    ///
+    /// # Returns
+    ///
+    /// A `Vec<u8>` containing the complete binary representation of the dictionary.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - Compression fails
+    /// - Serialization fails
+    /// - Binary format validation fails
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::{Dictionary, OpenDictionary};
+    ///
+    /// let dict = Dictionary::from_path("dictionary.xml")?;
+    /// let compiled = dict.build()?;
+    /// let bytes = compiled.to_bytes()?;
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
     pub fn to_bytes(&self) -> crate::Result<Vec<u8>> {
         self.to_bytes_with_options(CompilerOptions::default())
     }
 
+    /// Convert the dictionary to binary format with custom compilation options.
+    ///
+    /// This method provides fine-grained control over the compilation process,
+    /// allowing customization of compression settings and other options.
+    ///
+    /// # Arguments
+    ///
+    /// * `options` - Compilation options to customize the process
+    ///
+    /// # Returns
+    ///
+    /// A `Vec<u8>` containing the complete binary representation of the dictionary.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - Compression fails with the specified options
+    /// - Serialization fails
+    /// - Binary format validation fails
+    /// - Header construction fails
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::{Dictionary, OpenDictionary, CompilerOptions, CompressOptions};
+    ///
+    /// let dict = Dictionary::from_path("dictionary.xml")?;
+    /// let compiled = dict.build()?;
+    ///
+    /// let options = CompilerOptions::default()
+    ///     .with_compression(CompressOptions::default());
+    /// let bytes = compiled.to_bytes_with_options(options)?;
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
     pub fn to_bytes_with_options<Options: AsRef<CompilerOptions>>(
         &self,
         options: Options,
@@ -86,6 +207,35 @@ impl OpenDictionary {
 }
 
 impl Dictionary {
+    /// Build a compiled dictionary from the current dictionary data.
+    ///
+    /// This method transforms a [`Dictionary`] into an [`OpenDictionary`] by
+    /// serializing the dictionary data and preparing it for binary compilation.
+    /// The resulting [`OpenDictionary`] can then be converted to bytes or saved to disk.
+    ///
+    /// # Returns
+    ///
+    /// An [`OpenDictionary`] containing the serialized dictionary data with
+    /// appropriate metadata (signature, version, etc.).
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - Dictionary serialization fails
+    /// - Memory allocation fails
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::Dictionary;
+    ///
+    /// let dict = Dictionary::from_path("dictionary.xml")?;
+    /// let compiled = dict.build()?;
+    ///
+    /// // Now you can save to disk or convert to bytes
+    /// compiled.to_disk("output.odict")?;
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
     pub fn build(&self) -> crate::Result<OpenDictionary> {
         let dict = OpenDictionary {
             signature: String::from_utf8_lossy(SIGNATURE).to_string(),
diff --git a/lib/src/core/consts.rs b/lib/src/core/consts.rs
index 9a100a2b..22f9b6f7 100644
--- a/lib/src/core/consts.rs
+++ b/lib/src/core/consts.rs
@@ -1,8 +1,78 @@
+//! Core constants for the ODict binary format.
+//!
+//! This module defines the fundamental constants used throughout the ODict
+//! library for binary format identification, versioning, and compatibility
+//! checking.
+//!
+//! # Overview
+//!
+//! The constants defined here are used for:
+//! - Binary format identification through magic signatures
+//! - Version tracking and compatibility verification
+//! - Ensuring consistent format standards across the library
+//!
+//! # Binary Format Identification
+//!
+//! The [`SIGNATURE`] constant provides the magic bytes that identify ODict
+//! binary files. This signature is written at the beginning of every compiled
+//! dictionary file and verified during reading operations.
+//!
+//! # Version Management
+//!
+//! The [`VERSION`] constant contains the current library version, automatically
+//! derived from the Cargo package version. This is used for compatibility
+//! checking when reading dictionary files created with different library versions.
+
 use std::sync::LazyLock;
 
 use crate::version::SemanticVersion;
 
+/// Magic signature bytes for ODict binary format identification.
+///
+/// This 5-byte signature ("ODICT") is written at the beginning of every
+/// compiled dictionary file to identify it as a valid ODict binary format.
+/// The signature is checked during file reading to ensure format validity.
+///
+/// # Format
+///
+/// The signature consists of the ASCII bytes for "ODICT":
+/// - `O` (0x4F)
+/// - `D` (0x44)
+/// - `I` (0x49)
+/// - `C` (0x43)
+/// - `T` (0x54)
+///
+/// # Usage
+///
+/// This constant is used internally by the reading and writing operations
+/// and should not typically be used directly by library consumers.
 pub const SIGNATURE: &[u8] = b"ODICT";
 
+/// Current library version for compatibility checking.
+///
+/// This constant contains the semantic version of the current library,
+/// automatically derived from the Cargo package version at compile time.
+/// It's used to ensure compatibility between dictionary files and the
+/// library version attempting to read them.
+///
+/// # Compatibility Rules
+///
+/// Dictionary files are considered compatible if they have:
+/// - The same major version number as the library
+/// - The same prerelease status (stable vs. prerelease)
+///
+/// # Lazy Initialization
+///
+/// The version is lazily initialized from the `CARGO_PKG_VERSION` environment
+/// variable, which is automatically set by Cargo during compilation. This
+/// ensures the version always matches the actual package version.
+///
+/// # Examples
+///
+/// ```rust
+/// use odict::core::consts::VERSION;
+///
+/// println!("Library version: {}", *VERSION);
+/// ```
 pub const VERSION: LazyLock<SemanticVersion> =
     LazyLock::new(|| SemanticVersion::from(env!("CARGO_PKG_VERSION")));
diff --git a/lib/src/core/lexicon.rs b/lib/src/core/lexicon.rs
index a85bb467..9540de77 100644
--- a/lib/src/core/lexicon.rs
+++ b/lib/src/core/lexicon.rs
@@ -1,8 +1,83 @@
+//! Lexicon extraction operations for ODict dictionaries.
+//!
+//! This module provides functionality to extract sorted lists of terms (lexicons)
+//! from dictionaries. A lexicon represents all the headwords/terms available
+//! in a dictionary, sorted alphabetically.
+//!
+//! # Overview
+//!
+//! The lexicon functionality allows you to:
+//! - Extract all terms from a dictionary as a sorted list
+//! - Get a quick overview of dictionary contents
+//! - Generate word lists for analysis or display
+//!
+//! # Examples
+//!
+//! ## Extracting a Lexicon from a Dictionary
+//!
+//! ```rust
+//! use odict::Dictionary;
+//!
+//! let dict = Dictionary::from_path("dictionary.xml")?;
+//! let terms = dict.lexicon();
+//!
+//! // Print all terms in alphabetical order
+//! for term in terms {
+//!     println!("{}", term);
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## Working with Archived Dictionaries
+//!
+//! ```rust
+//! use odict::OpenDictionary;
+//!
+//! let dict = OpenDictionary::from_path("dictionary.odict")?;
+//! let archived = dict.contents()?;
+//! let terms = archived.lexicon();
+//!
+//! println!("Dictionary contains {} terms", terms.len());
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+
 use crate::schema::{ArchivedDictionary, Dictionary};
 
 macro_rules! lexicon {
     ($t:ident) => {
         impl $t {
+            /// Extract a sorted lexicon (list of terms) from the dictionary.
+            ///
+            /// This method collects all entry terms from the dictionary and returns
+            /// them as a sorted vector of string references. The terms are sorted
+            /// alphabetically using standard string ordering.
+            ///
+            /// # Returns
+            ///
+            /// A `Vec<&str>` containing all dictionary terms in alphabetical order.
+            /// Each term appears exactly once, even if there are multiple entries
+            /// with the same term.
+            ///
+            /// # Examples
+            ///
+            /// ```rust
+            /// use odict::Dictionary;
+            ///
+            /// let dict = Dictionary::from_path("dictionary.xml")?;
+            /// let lexicon = dict.lexicon();
+            ///
+            /// // Print first 10 terms
+            /// for term in lexicon.iter().take(10) {
+            ///     println!("{}", term);
+            /// }
+            /// # Ok::<(), Box<dyn std::error::Error>>(())
+            /// ```
+            ///
+            /// # Performance
+            ///
+            /// This operation has O(n log n) complexity due to sorting, where n is
+            /// the number of entries in the dictionary. The terms are collected
+            /// first, then sorted in-place.
             pub fn lexicon(&self) -> Vec<&str> {
                 let mut vec: Vec<&str> = self
                     .entries
diff --git a/lib/src/core/lookup.rs b/lib/src/core/lookup.rs
index db23ab44..129feb9e 100644
--- a/lib/src/core/lookup.rs
+++ b/lib/src/core/lookup.rs
@@ -1,20 +1,216 @@
+//! Advanced dictionary lookup operations for ODict.
+//!
+//! This module provides sophisticated search capabilities over dictionaries with
+//! configurable matching strategies, redirect following via see_also links, and
+//! case-insensitive fallback options. It supports both single and batch lookups
+//! with parallel processing for optimal performance.
+//!
+//! # Overview
+//!
+//! The lookup system offers multiple layers of functionality:
+//!
+//! ## Matching Strategies
+//! - **Exact matching**: Direct term-to-entry mapping
+//! - **Split strategy**: Progressive substring matching for compound terms
+//!
+//! ## Advanced Features
+//! - **Redirect following**: Automatic traversal of see_also links with cycle protection
+//! - **Case-insensitive fallback**: Automatic retry with lowercase when exact match fails
+//! - **Parallel processing**: Concurrent lookup of multiple queries for performance
+//! - **Configurable limits**: Control redirect depth and matching behavior
+//!
+//! ## Performance Characteristics
+//! - Single lookups: O(1) average case for exact matches
+//! - Split strategy: O(n²) worst case where n is query length
+//! - Parallel lookups: Scales with available CPU cores
+//! - Memory efficient: Zero-copy results with lifetime management
+//!
+//! # Examples
+//!
+//! ## Basic Exact Lookup
+//!
+//! ```rust
+//! use odict::{OpenDictionary, LookupOptions};
+//!
+//! let dict = OpenDictionary::from_path("dictionary.odict")?;
+//! let archived = dict.contents()?;
+//!
+//! let queries = vec!["hello"];
+//! let results = archived.lookup(&queries, LookupOptions::default())?;
+//!
+//! for result in results {
+//!     println!("Found: {}", result.entry.term.as_str());
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## Advanced Lookup with Options
+//!
+//! ```rust
+//! use odict::{OpenDictionary, LookupOptions, LookupStrategy};
+//!
+//! let dict = OpenDictionary::from_path("dictionary.odict")?;
+//! let archived = dict.contents()?;
+//!
+//! let options = LookupOptions::default()
+//!     .insensitive(true)           // Enable case-insensitive fallback
+//!     .follow(3)                   // Follow up to 3 redirects
+//!     .strategy(LookupStrategy::Split(2)); // Split to minimum 2 chars
+//!
+//! let queries = vec!["Hello", "compound-word"];
+//! let results = archived.lookup(&queries, options)?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## Handling Redirects
+//!
+//! ```rust
+//! use odict::{OpenDictionary, LookupOptions};
+//!
+//! let dict = OpenDictionary::from_path("dictionary.odict")?;
+//! let archived = dict.contents()?;
+//!
+//! let options = LookupOptions::default().follow(5);
+//! let queries = vec!["abbreviation"]; // Might redirect to full form
+//! let results = archived.lookup(&queries, options)?;
+//!
+//! for result in results {
+//!     if let Some(redirect_from) = result.directed_from {
+//!         println!("'{}' redirected from '{}'",
+//!                  result.entry.term.as_str(),
+//!                  redirect_from.term.as_str());
+//!     }
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## Split Strategy for Compound Terms
+//!
+//! ```rust
+//! use odict::{OpenDictionary, LookupOptions, LookupStrategy};
+//!
+//! let dict = OpenDictionary::from_path("dictionary.odict")?;
+//! let archived = dict.contents()?;
+//!
+//! // This will try "compound-word", then "compound", then "word"
+//! let options = LookupOptions::default()
+//!     .strategy(LookupStrategy::Split(3)); // Minimum 3 characters
+//!
+//! let queries = vec!["compound-word"];
+//! let results = archived.lookup(&queries, options)?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
 use crate::schema::{ArchivedDictionary, ArchivedEntry, Dictionary, Entry};
 
 use rayon::prelude::*;
 use rkyv::option::ArchivedOption;
 use std::marker::{Send, Sync};
 
+/// Strategy for matching query terms against dictionary entries.
+///
+/// This enum defines the different approaches available for finding matches
+/// when performing dictionary lookups. Each strategy has different performance
+/// characteristics and use cases.
 #[derive(Debug, PartialEq, Clone)]
 pub enum LookupStrategy {
+    /// Match queries exactly against entry terms.
+    ///
+    /// This is the fastest strategy, performing direct hash map lookups.
+    /// It requires the query to exactly match an entry term (case-sensitive
+    /// unless the `insensitive` option is enabled).
+    ///
+    /// **Performance**: O(1) average case
+    /// **Use case**: When you know the exact term you're looking for
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::{LookupStrategy, LookupOptions};
+    ///
+    /// let options = LookupOptions::default()
+    ///     .strategy(LookupStrategy::Exact);
+    /// ```
     Exact,
+
+    /// Split the query into progressively smaller substrings down to `min_length`,
+    /// attempting to match each substring from left to right.
+    ///
+    /// This strategy is useful for compound words or when you want to find
+    /// partial matches. It starts with the full query and progressively
+    /// shortens it from the right until a match is found or the minimum
+    /// length is reached.
+    ///
+    /// **Performance**: O(n²) worst case where n is query length
+    /// **Use case**: Compound words, partial matching, morphological analysis
+    ///
+    /// # Algorithm
+    ///
+    /// For a query "compound-word" with min_length=3:
+    /// 1. Try "compound-word" (full query)
+    /// 2. Try "compound-wor", "compound-wo", etc.
+    /// 3. Try "compound" (if found, move to next segment)
+    /// 4. Try "word", "wor" (down to min_length)
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::{LookupStrategy, LookupOptions};
+    ///
+    /// // Split down to minimum 3 characters
+    /// let options = LookupOptions::default()
+    ///     .strategy(LookupStrategy::Split(3));
+    /// ```
     Split(usize),
 }
 
+/// Configuration options for dictionary lookup operations.
+///
+/// This struct provides fine-grained control over lookup behavior, including
+/// redirect following, matching strategies, and case sensitivity. All options
+/// have sensible defaults for common use cases.
+///
+/// # Default Behavior
+///
+/// - **No redirect following**: Prevents infinite loops and improves performance
+/// - **Exact matching**: Most predictable and fastest lookup strategy
+/// - **Case-sensitive search**: Preserves linguistic distinctions
+///
+/// # Examples
+///
+/// ## Basic Usage
+///
+/// ```rust
+/// use odict::LookupOptions;
+///
+/// // Use all defaults
+/// let options = LookupOptions::default();
+/// ```
+///
+/// ## Custom Configuration
+///
+/// ```rust
+/// use odict::{LookupOptions, LookupStrategy};
+///
+/// let options = LookupOptions::default()
+///     .follow(5)                           // Follow up to 5 redirects
+///     .insensitive(true)                   // Enable case-insensitive fallback
+///     .strategy(LookupStrategy::Split(2)); // Split strategy with min length 2
+/// ```
 #[derive(Debug, Clone)]
 pub struct LookupOptions {
     /// Whether to follow see_also links until finding an entry with etymologies.
     pub follow: bool,
     pub strategy: LookupStrategy,
+
+    /// Whether to fall back to case-insensitive search if exact match fails.
+    ///
+    /// When enabled, if an exact (case-sensitive) match fails, the system
+    /// will automatically retry with a lowercase version of the query.
+    /// This is useful for handling user input that may have incorrect
+    /// capitalization.
+    ///
+    /// **Note**: The fallback only occurs if the lowercase version differs
+    /// from the original query, preventing unnecessary duplicate lookups.
     pub insensitive: bool,
 }
 
@@ -25,6 +221,27 @@ impl AsRef<LookupOptions> for LookupOptions {
 }
 
 impl LookupOptions {
+    /// Construct default lookup options with safe, predictable settings.
+    ///
+    /// The default configuration prioritizes safety and performance:
+    /// - **No redirect following**: Prevents infinite loops and improves performance
+    /// - **Exact matching strategy**: Most predictable and fastest lookup method
+    /// - **Case-sensitive search**: Preserves linguistic distinctions
+    ///
+    /// # Returns
+    ///
+    /// A new `LookupOptions` instance with default settings.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::LookupOptions;
+    ///
+    /// let options = LookupOptions::default();
+    /// assert_eq!(options.follow, None);
+    /// assert_eq!(options.strategy, odict::LookupStrategy::Exact);
+    /// assert_eq!(options.insensitive, false);
+    /// ```
     pub fn default() -> Self {
         Self {
             follow: false,
@@ -38,20 +255,140 @@ impl LookupOptions {
         self
     }
 
+    /// Set the matching strategy for query processing.
+    ///
+    /// The strategy determines how queries are matched against dictionary entries.
+    /// Different strategies have different performance characteristics and use cases.
+    ///
+    /// # Arguments
+    ///
+    /// * `strategy` - The [`LookupStrategy`] to use for matching
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::{LookupOptions, LookupStrategy};
+    ///
+    /// // Use exact matching (fastest)
+    /// let exact = LookupOptions::default()
+    ///     .strategy(LookupStrategy::Exact);
+    ///
+    /// // Use split strategy for compound words
+    /// let split = LookupOptions::default()
+    ///     .strategy(LookupStrategy::Split(3));
+    /// ```
     pub fn strategy(mut self, strategy: LookupStrategy) -> Self {
         self.strategy = strategy;
         self
     }
 
+    /// Enable or disable case-insensitive fallback matching.
+    ///
+    /// When enabled, if an exact (case-sensitive) match fails, the system
+    /// automatically retries with a lowercase version of the query. This is
+    /// useful for handling user input with incorrect capitalization.
+    ///
+    /// # Arguments
+    ///
+    /// * `insensitive` - Whether to enable case-insensitive fallback
+    ///
+    /// # Performance Impact
+    ///
+    /// - Minimal impact when exact matches succeed
+    /// - Adds one additional lookup when exact match fails and query contains uppercase
+    /// - No additional lookup if the query is already lowercase
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::LookupOptions;
+    ///
+    /// // Enable case-insensitive fallback
+    /// let options = LookupOptions::default().insensitive(true);
+    ///
+    /// // This will try "Hello" first, then "hello" if not found
+    /// // let results = dict.lookup(&["Hello"], options)?;
+    /// ```
     pub fn insensitive(mut self, insensitive: bool) -> Self {
         self.insensitive = insensitive;
         self
     }
 }
 
+/// Result of a dictionary lookup operation.
+///
+/// This struct encapsulates the result of a successful lookup, including
+/// the matched entry and optional redirect information. It provides context
+/// about how the match was found, which is useful for understanding the
+/// lookup path and handling redirects.
+///
+/// # Generic Parameter
+///
+/// * `E` - The entry type (either `&Entry` or `&ArchivedEntry`)
+///
+/// # Examples
+///
+/// ## Basic Usage
+///
+/// ```rust
+/// use odict::{OpenDictionary, LookupOptions};
+///
+/// let dict = OpenDictionary::from_path("dictionary.odict")?;
+/// let archived = dict.contents()?;
+/// let queries = vec!["hello"];
+/// let results = archived.lookup(&queries, LookupOptions::default())?;
+///
+/// for result in results {
+///     println!("Found: {}", result.entry.term.as_str());
+///
+///     if let Some(redirect_from) = result.directed_from {
+///         println!("  (redirected from: {})", redirect_from.term.as_str());
+///     }
+/// }
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// ## Checking for Redirects
+///
+/// ```rust
+/// use odict::{OpenDictionary, LookupOptions};
+///
+/// # fn example(results: Vec<odict::LookupResult<&odict::ArchivedEntry>>) {
+/// for result in results {
+///     match result.directed_from {
+///         Some(original) => {
+///             println!("'{}' is an alias for '{}'",
+///                      original.term.as_str(),
+///                      result.entry.term.as_str());
+///         }
+///         None => {
+///             println!("Direct match: {}", result.entry.term.as_str());
+///         }
+///     }
+/// }
+/// # }
+/// ```
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct LookupResult<E> {
+    /// The matched dictionary entry.
+    ///
+    /// This is the final entry that was found, either through direct matching
+    /// or by following redirects. It contains all the linguistic data
+    /// (definitions, etymologies, pronunciations, etc.) for the term.
     pub entry: E,
+
+    /// The entry that originally directed to this match via see_also links.
+    ///
+    /// This field is `Some(entry)` when the result was found by following
+    /// a redirect chain, containing the entry that started the redirect.
+    /// It's `None` for direct matches without any redirects.
+    ///
+    /// # Use Cases
+    ///
+    /// - Displaying "redirected from" information to users
+    /// - Understanding alias relationships in the dictionary
+    /// - Debugging lookup paths and redirect chains
+    /// - Analytics on which redirects are commonly followed
     pub directed_from: Option<E>,
 }
 
@@ -62,6 +399,12 @@ pub struct LookupResult<E> {
 macro_rules! lookup {
     ($tys:ident, $ret:ident, $opt:ident) => {
         impl $tys {
+            #[doc = r#"Attempt to find a single entry by term.
+
+This helper supports optional redirect following and an optional
+case-insensitive retry (lowercasing the query) when configured.
+
+Returns Some(LookupResult) on a match, or None if not found."#]
             fn find_entry<'a>(
                 &'a self,
                 follow: &bool,
@@ -132,6 +475,9 @@ macro_rules! lookup {
                 Ok($opt::None)
             }
 
+            #[doc = r#"Perform lookup for a single query using the provided options.
+
+Depending on the strategy, this may return zero or more results."#]
             fn perform_lookup<'a, Options>(
                 &'a self,
                 query: &str,
@@ -196,6 +542,26 @@ macro_rules! lookup {
                 Ok(results)
             }
 
+            #[doc = r#"Lookup multiple queries in parallel.
+
+Each query is processed independently with the provided options.
+
+Returns all matches without a guaranteed order.
+
+Examples
+--------
+```rust
+use odict::{OpenDictionary, LookupOptions, LookupStrategy};
+# fn demo(dict: &odict::OpenDictionary) -> odict::Result<()> {
+let archived = dict.contents()?;
+let queries = vec!["hello", "world"];
+let options = LookupOptions::default()
+    .insensitive(true)
+    .strategy(LookupStrategy::Exact);
+let results = archived.lookup(&queries, options)?;
+# Ok(())
+# }
+```"#]
             pub fn lookup<'a, 'b, Query, Options>(
                 &'a self,
                 queries: &'b Vec<Query>,
diff --git a/lib/src/core/merge.rs b/lib/src/core/merge.rs
index 9944cbc8..bb65f115 100644
--- a/lib/src/core/merge.rs
+++ b/lib/src/core/merge.rs
@@ -1,12 +1,104 @@
+//! Dictionary merging operations for ODict.
+//!
+//! This module provides functionality to combine multiple dictionaries into a single
+//! dictionary, preserving unique entries and avoiding duplicates.
+//!
+//! # Overview
+//!
+//! The merge operations allow you to:
+//! - Merge a single dictionary into another
+//! - Merge multiple dictionaries at once
+//! - Preserve unique entries (no duplicates)
+//!
+//! # Examples
+//!
+//! ## Merging Two Dictionaries
+//!
+//! ```rust
+//! use odict::Dictionary;
+//!
+//! let mut dict1 = Dictionary::from_path("dict1.xml")?;
+//! let dict2 = Dictionary::from_path("dict2.xml")?;
+//!
+//! // Merge dict2 into dict1
+//! dict1.merge(&dict2);
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## Merging Multiple Dictionaries
+//!
+//! ```rust
+//! use odict::Dictionary;
+//!
+//! let mut main_dict = Dictionary::from_path("main.xml")?;
+//! let dict2 = Dictionary::from_path("dict2.xml")?;
+//! let dict3 = Dictionary::from_path("dict3.xml")?;
+//!
+//! // Merge multiple dictionaries at once
+//! main_dict.merge_multi(vec![&dict2, &dict3]);
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+
 use crate::schema::Dictionary;
 
 impl Dictionary {
+    /// Merge multiple dictionaries into this dictionary.
+    ///
+    /// This is a convenience method that calls [`merge`](Dictionary::merge) for each
+    /// dictionary in the provided vector. Entries are processed in order, and
+    /// duplicates are automatically filtered out.
+    ///
+    /// # Arguments
+    ///
+    /// * `dictionaries` - A vector of dictionary references to merge
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::Dictionary;
+    ///
+    /// let mut main_dict = Dictionary::from_path("main.xml")?;
+    /// let dict2 = Dictionary::from_path("dict2.xml")?;
+    /// let dict3 = Dictionary::from_path("dict3.xml")?;
+    ///
+    /// main_dict.merge_multi(vec![&dict2, &dict3]);
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
     pub fn merge_multi(&mut self, dictionaries: Vec<&Dictionary>) {
         for src in dictionaries {
             self.merge(src);
         }
     }
 
+    /// Merge another dictionary into this dictionary.
+    ///
+    /// This method adds all entries from the source dictionary that are not
+    /// already present in this dictionary. Duplicate entries (based on the
+    /// entry's equality implementation) are automatically filtered out.
+    ///
+    /// # Arguments
+    ///
+    /// * `dictionary` - The source dictionary to merge from
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::Dictionary;
+    ///
+    /// let mut dict1 = Dictionary::from_path("dict1.xml")?;
+    /// let dict2 = Dictionary::from_path("dict2.xml")?;
+    ///
+    /// // Merge dict2 into dict1
+    /// dict1.merge(&dict2);
+    ///
+    /// // dict1 now contains all unique entries from both dictionaries
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Performance
+    ///
+    /// The merge operation has O(n) complexity where n is the number of entries
+    /// in the source dictionary. Each entry is checked for existence before insertion.
     pub fn merge(&mut self, dictionary: &Dictionary) {
         for entry in dictionary.entries.iter() {
             if !self.entries.contains(entry) {
diff --git a/lib/src/core/mod.rs b/lib/src/core/mod.rs
index 976addb3..a428c4e9 100644
--- a/lib/src/core/mod.rs
+++ b/lib/src/core/mod.rs
@@ -1,3 +1,62 @@
+//! Core functionality for the ODict dictionary format.
+//!
+//! This module provides the fundamental operations for working with ODict dictionaries,
+//! including compilation, reading, writing, lookup, and various utility functions.
+//!
+//! # Overview
+//!
+//! The core module is organized into several key areas:
+//!
+//! - **Compilation & Serialization**: [`compile`] - Convert dictionaries to binary format
+//! - **Reading & Deserialization**: [`read`] - Load dictionaries from various sources
+//! - **Writing**: [`write`] - Save dictionaries to disk
+//! - **Lookup Operations**: [`lookup`] - Search and retrieve dictionary entries
+//! - **Dictionary Management**: [`merge`], [`lexicon`] - Combine dictionaries and extract terms
+//! - **Utilities**: [`preview`], [`rank`], [`resolve`] - Additional dictionary operations
+//! - **Version Management**: [`version`] - Semantic versioning support
+//!
+//! # Examples
+//!
+//! ## Basic Dictionary Operations
+//!
+//! ```rust
+//! use odict::{Dictionary, OpenDictionary};
+//!
+//! // Load a dictionary from XML
+//! let dict = Dictionary::from_path("dictionary.xml")?;
+//!
+//! // Compile to binary format
+//! let compiled = dict.build()?;
+//!
+//! // Save to disk
+//! compiled.to_disk("dictionary.odict")?;
+//!
+//! // Load from binary
+//! let loaded = OpenDictionary::from_path("dictionary.odict")?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## Dictionary Lookup
+//!
+//! ```rust
+//! use odict::{OpenDictionary, LookupOptions, LookupStrategy};
+//!
+//! let dict = OpenDictionary::from_path("dictionary.odict")?;
+//! let archived = dict.contents()?;
+//!
+//! // Simple lookup
+//! let queries = vec!["hello"];
+//! let results = archived.lookup(&queries, LookupOptions::default())?;
+//!
+//! // Advanced lookup with options
+//! let options = LookupOptions::default()
+//!     .insensitive(true)
+//!     .follow(5)
+//!     .strategy(LookupStrategy::Split(2));
+//! let results = archived.lookup(&queries, options)?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+
 mod consts;
 
 pub mod compile;
diff --git a/lib/src/core/preview.rs b/lib/src/core/preview.rs
index c498ec9c..9d29d275 100644
--- a/lib/src/core/preview.rs
+++ b/lib/src/core/preview.rs
@@ -1,12 +1,63 @@
+//! Entry preview generation for ODict dictionaries.
+//!
+//! This module provides functionality to generate concise text previews of dictionary
+//! entries by extracting and concatenating their definitions. Previews are useful for
+//! displaying quick summaries of entries without showing the full structured data.
+//!
+//! # Overview
+//!
+//! The preview functionality allows you to:
+//! - Generate text summaries of dictionary entries
+//! - Customize the delimiter used to separate definitions
+//! - Handle both regular and grouped definitions
+//! - Optionally convert markdown to plain text (when markdown feature is enabled)
+//!
+//! # Examples
+//!
+//! ## Basic Preview Generation
+//!
+//! ```rust
+//! use odict::{Dictionary, PreviewOptions};
+//!
+//! let dict = Dictionary::from_path("dictionary.xml")?;
+//! if let Some(entry) = dict.entries.iter().next() {
+//!     let preview = entry.preview(PreviewOptions::default());
+//!     println!("Preview: {}", preview);
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## Custom Delimiter
+//!
+//! ```rust
+//! use odict::{Dictionary, PreviewOptions};
+//!
+//! let dict = Dictionary::from_path("dictionary.xml")?;
+//! if let Some(entry) = dict.entries.iter().next() {
+//!     let options = PreviewOptions::default().delimiter(" | ".to_string());
+//!     let preview = entry.preview(options);
+//!     println!("Preview: {}", preview);
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+
 #[cfg(feature = "markdown")]
 use crate::md::to_text;
 use crate::schema::{ArchivedDefinitionType, ArchivedEntry, DefinitionType, Entry};
 
+/// Configuration options for generating entry previews.
+///
+/// This struct allows customization of how definitions are joined together
+/// when creating a preview string from a dictionary entry.
 pub struct PreviewOptions {
     delimiter: String,
 }
 
 impl Default for PreviewOptions {
+    /// Create default preview options.
+    ///
+    /// The default delimiter is `"; "` (semicolon followed by space), which
+    /// provides a natural separation between multiple definitions.
     fn default() -> Self {
         Self {
             delimiter: "; ".to_string(),
@@ -15,12 +66,30 @@ impl Default for PreviewOptions {
 }
 
 impl PreviewOptions {
+    /// Set a custom delimiter for joining definitions.
+    ///
+    /// # Arguments
+    ///
+    /// * `delimiter` - The string to use for separating definitions in the preview
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::PreviewOptions;
+    ///
+    /// let options = PreviewOptions::default()
+    ///     .delimiter(" | ".to_string());
+    /// ```
     pub fn delimiter(mut self, delimiter: String) -> Self {
         self.delimiter = delimiter;
         self
     }
 }
 
+/// Convert text content to plain text.
+///
+/// When the markdown feature is disabled, this function returns the input unchanged.
+/// When the markdown feature is enabled, it converts markdown to plain text.
 #[cfg(not(feature = "markdown"))]
 fn to_text(value: &str) -> &str {
     value
@@ -29,6 +98,48 @@ fn to_text(value: &str) -> &str {
 macro_rules! preview {
     ($t:ident, $d:ident) => {
         impl $t {
+            /// Generate a text preview of this dictionary entry.
+            ///
+            /// This method extracts all definitions from the entry's etymologies and senses,
+            /// converts them to plain text (if markdown feature is enabled), and joins them
+            /// using the specified delimiter.
+            ///
+            /// # Arguments
+            ///
+            /// * `options` - Configuration for preview generation
+            ///
+            /// # Returns
+            ///
+            /// A `String` containing all definitions joined by the specified delimiter.
+            /// If the entry has no definitions, returns an empty string.
+            ///
+            /// # Examples
+            ///
+            /// ```rust
+            /// use odict::{Dictionary, PreviewOptions};
+            ///
+            /// let dict = Dictionary::from_path("dictionary.xml")?;
+            /// if let Some(entry) = dict.entries.iter().next() {
+            ///     // Use default options ("; " delimiter)
+            ///     let preview = entry.preview(PreviewOptions::default());
+            ///
+            ///     // Use custom delimiter
+            ///     let custom_preview = entry.preview(
+            ///         PreviewOptions::default().delimiter(" | ".to_string())
+            ///     );
+            /// }
+            /// # Ok::<(), Box<dyn std::error::Error>>(())
+            /// ```
+            ///
+            /// # Processing Order
+            ///
+            /// Definitions are processed in this order:
+            /// 1. Iterate through etymologies
+            /// 2. For each etymology, iterate through senses
+            /// 3. For each sense, iterate through definitions
+            /// 4. Handle both individual definitions and definition groups
+            /// 5. Convert markdown to text (if feature enabled)
+            /// 6. Join all definitions with the specified delimiter
             pub fn preview(&self, options: PreviewOptions) -> String {
                 let definitions: Vec<String> = self
                     .etymologies
diff --git a/lib/src/core/rank.rs b/lib/src/core/rank.rs
index 75d92139..095e2e0a 100644
--- a/lib/src/core/rank.rs
+++ b/lib/src/core/rank.rs
@@ -1,6 +1,65 @@
+//! Entry ranking operations for ODict dictionaries.
+//!
+//! This module provides functionality to analyze and extract ranking information
+//! from dictionary entries. Rankings are typically used to indicate word frequency,
+//! importance, or usage patterns within a dictionary.
+//!
+//! # Overview
+//!
+//! The ranking functionality allows you to:
+//! - Find the minimum rank across all entries
+//! - Find the maximum rank across all entries
+//! - Analyze ranking distribution in dictionaries
+//!
+//! # Ranking System
+//!
+//! Rankings are optional numeric values associated with dictionary entries.
+//! Lower numbers typically indicate higher frequency or importance (e.g., rank 1
+//! might be the most common word). Not all entries are required to have ranks.
+//!
+//! # Examples
+//!
+//! ## Finding Rank Range
+//!
+//! ```rust
+//! use odict::Dictionary;
+//!
+//! let dict = Dictionary::from_path("dictionary.xml")?;
+//!
+//! if let Some(min) = dict.min_rank() {
+//!     println!("Highest priority rank: {}", min);
+//! }
+//!
+//! if let Some(max) = dict.max_rank() {
+//!     println!("Lowest priority rank: {}", max);
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## Working with Archived Dictionaries
+//!
+//! ```rust
+//! use odict::OpenDictionary;
+//!
+//! let dict = OpenDictionary::from_path("dictionary.odict")?;
+//! let archived = dict.contents()?;
+//!
+//! match (archived.min_rank(), archived.max_rank()) {
+//!     (Some(min), Some(max)) => {
+//!         println!("Rank range: {} to {}", min, max);
+//!     }
+//!     _ => println!("No ranked entries found"),
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+
 use crate::schema::{ArchivedDictionary, Dictionary};
 
 impl ArchivedDictionary {
+    /// Create an iterator over all rank values in the archived dictionary.
+    ///
+    /// This internal method filters entries to only those with rank values,
+    /// converting archived rank values to native u32 format.
     fn rank_iter(&self) -> impl Iterator<Item = u32> + '_ {
         self.entries
             .iter()
@@ -9,6 +68,9 @@ impl ArchivedDictionary {
 }
 
 impl Dictionary {
+    /// Create an iterator over all rank values in the dictionary.
+    ///
+    /// This internal method filters entries to only those with rank values.
     fn rank_iter(&self) -> impl Iterator<Item = u32> + '_ {
         self.entries.iter().filter_map(|entry| entry.rank)
     }
@@ -17,10 +79,68 @@ impl Dictionary {
 macro_rules! rank {
     ($t:ident) => {
         impl $t {
+            /// Find the minimum rank value across all entries in the dictionary.
+            ///
+            /// This method searches through all entries that have rank values and
+            /// returns the smallest rank number. Since lower ranks typically indicate
+            /// higher importance or frequency, this represents the "highest priority" entry.
+            ///
+            /// # Returns
+            ///
+            /// - `Some(u32)` - The minimum rank value if any entries have ranks
+            /// - `None` - If no entries in the dictionary have rank values
+            ///
+            /// # Examples
+            ///
+            /// ```rust
+            /// use odict::Dictionary;
+            ///
+            /// let dict = Dictionary::from_path("dictionary.xml")?;
+            ///
+            /// match dict.min_rank() {
+            ///     Some(min_rank) => println!("Most important entry has rank: {}", min_rank),
+            ///     None => println!("No entries have rank information"),
+            /// }
+            /// # Ok::<(), Box<dyn std::error::Error>>(())
+            /// ```
+            ///
+            /// # Performance
+            ///
+            /// This operation has O(n) complexity where n is the number of entries
+            /// in the dictionary, as it must examine all entries to find the minimum.
             pub fn min_rank(&self) -> Option<u32> {
                 self.rank_iter().min()
             }
 
+            /// Find the maximum rank value across all entries in the dictionary.
+            ///
+            /// This method searches through all entries that have rank values and
+            /// returns the largest rank number. Since higher ranks typically indicate
+            /// lower importance or frequency, this represents the "lowest priority" entry.
+            ///
+            /// # Returns
+            ///
+            /// - `Some(u32)` - The maximum rank value if any entries have ranks
+            /// - `None` - If no entries in the dictionary have rank values
+            ///
+            /// # Examples
+            ///
+            /// ```rust
+            /// use odict::Dictionary;
+            ///
+            /// let dict = Dictionary::from_path("dictionary.xml")?;
+            ///
+            /// match dict.max_rank() {
+            ///     Some(max_rank) => println!("Least important entry has rank: {}", max_rank),
+            ///     None => println!("No entries have rank information"),
+            /// }
+            /// # Ok::<(), Box<dyn std::error::Error>>(())
+            /// ```
+            ///
+            /// # Performance
+            ///
+            /// This operation has O(n) complexity where n is the number of entries
+            /// in the dictionary, as it must examine all entries to find the maximum.
             pub fn max_rank(&self) -> Option<u32> {
                 self.rank_iter().max()
             }
diff --git a/lib/src/core/read.rs b/lib/src/core/read.rs
index c6036ffc..66e31640 100644
--- a/lib/src/core/read.rs
+++ b/lib/src/core/read.rs
@@ -1,3 +1,60 @@
+//! Dictionary reading and deserialization operations for ODict.
+//!
+//! This module provides functionality to read and deserialize dictionaries from
+//! various sources, including XML files and binary ODict format files. It handles
+//! format validation, version compatibility checking, and decompression.
+//!
+//! # Overview
+//!
+//! The reading functionality supports:
+//! - Loading dictionaries from XML files
+//! - Loading compiled dictionaries from binary ODict files
+//! - Reading from file paths or byte arrays
+//! - Automatic format detection and validation
+//! - Version compatibility verification
+//! - Decompression of binary content
+//!
+//! # Binary Format Structure
+//!
+//! The ODict binary format consists of:
+//! 1. **Signature** (5 bytes): "ODICT" magic bytes for format identification
+//! 2. **Version Length** (8 bytes): Length of the version string in little-endian
+//! 3. **Version** (variable): UTF-8 encoded semantic version string
+//! 4. **Content Length** (8 bytes): Length of compressed content in little-endian
+//! 5. **Content** (variable): Compressed serialized dictionary data
+//!
+//! # Examples
+//!
+//! ## Loading from XML
+//!
+//! ```rust
+//! use odict::Dictionary;
+//!
+//! let dict = Dictionary::from_path("dictionary.xml")?;
+//! println!("Loaded {} entries", dict.entries.len());
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## Loading from Binary Format
+//!
+//! ```rust
+//! use odict::OpenDictionary;
+//!
+//! let dict = OpenDictionary::from_path("dictionary.odict")?;
+//! println!("Dictionary version: {}", dict.version);
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## Loading from Bytes
+//!
+//! ```rust
+//! use odict::OpenDictionary;
+//!
+//! let bytes = std::fs::read("dictionary.odict")?;
+//! let dict = OpenDictionary::from_bytes(&bytes)?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+
 use std::{
     io::{Cursor, Read},
     path::Path,
@@ -20,6 +77,23 @@ use std::str::FromStr;
 /*                               Helper Methods                               */
 /* -------------------------------------------------------------------------- */
 
+/// Read and validate the ODict signature from a binary stream.
+///
+/// This function reads the first 5 bytes from the stream and verifies they
+/// match the expected "ODICT" signature. This ensures the file is a valid
+/// ODict binary format.
+///
+/// # Arguments
+///
+/// * `reader` - A cursor over the binary data
+///
+/// # Returns
+///
+/// The signature as a string if valid, or an error if invalid.
+///
+/// # Errors
+///
+/// Returns [`Error::InvalidSignature`] if the signature doesn't match "ODICT".
 fn read_signature<T>(reader: &mut Cursor<T>) -> crate::Result<String>
 where
     T: AsRef<[u8]>,
@@ -37,6 +111,23 @@ where
     Ok(String::from_utf8(signature)?)
 }
 
+/// Read and validate the version information from a binary stream.
+///
+/// This function reads the version length, then the version string, and
+/// validates that it's compatible with the current library version.
+///
+/// # Arguments
+///
+/// * `reader` - A cursor over the binary data
+///
+/// # Returns
+///
+/// The parsed semantic version if compatible, or an error if incompatible.
+///
+/// # Errors
+///
+/// Returns [`Error::Incompatible`] if the version is not compatible with
+/// the current library version.
 fn read_version<T>(reader: &mut Cursor<T>) -> crate::Result<SemanticVersion>
 where
     T: AsRef<[u8]>,
@@ -58,6 +149,22 @@ where
     Ok(version)
 }
 
+/// Read and decompress the dictionary content from a binary stream.
+///
+/// This function reads the content length, then the compressed content,
+/// and decompresses it to obtain the raw serialized dictionary data.
+///
+/// # Arguments
+///
+/// * `reader` - A cursor over the binary data
+///
+/// # Returns
+///
+/// The decompressed content as a byte vector.
+///
+/// # Errors
+///
+/// Returns an error if decompression fails or if the content is corrupted.
 fn read_content<T>(reader: &mut Cursor<T>) -> crate::Result<Vec<u8>>
 where
     T: AsRef<[u8]>,
@@ -76,10 +183,44 @@ where
 /*                              DictionaryReader                              */
 /* -------------------------------------------------------------------------- */
 
+/// A reader for dictionary operations.
+///
+/// This struct provides a namespace for dictionary reading operations,
+/// though most functionality is implemented directly on the dictionary types.
 #[derive(Clone, Debug, Default)]
 pub struct DictionaryReader {}
 
 impl Dictionary {
+    /// Load a dictionary from an XML file.
+    ///
+    /// This method reads an XML file from the specified path and parses it
+    /// into a [`Dictionary`] structure. The XML must conform to the ODict
+    /// schema format.
+    ///
+    /// # Arguments
+    ///
+    /// * `path` - Path to the XML dictionary file
+    ///
+    /// # Returns
+    ///
+    /// A [`Dictionary`] instance containing the parsed data.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The file cannot be read
+    /// - The XML is malformed or doesn't conform to the ODict schema
+    /// - File system permissions prevent access
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::Dictionary;
+    ///
+    /// let dict = Dictionary::from_path("examples/dictionary.xml")?;
+    /// println!("Loaded dictionary with {} entries", dict.entries.len());
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
     pub fn from_path<P: AsRef<Path>>(path: P) -> crate::Result<Self> {
         let buffer = crate::fs::read_to_string(path)?;
         Self::from_str(&buffer)
@@ -87,6 +228,38 @@ impl Dictionary {
 }
 
 impl OpenDictionary {
+    /// Load a compiled dictionary from binary data.
+    ///
+    /// This method parses binary data in the ODict format, validating the
+    /// signature, checking version compatibility, and decompressing the content.
+    /// The resulting [`OpenDictionary`] can be used for fast lookups and operations.
+    ///
+    /// # Arguments
+    ///
+    /// * `data` - Binary data in ODict format
+    ///
+    /// # Returns
+    ///
+    /// An [`OpenDictionary`] instance ready for use.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The signature is invalid (not an ODict file)
+    /// - The version is incompatible with this library
+    /// - The content cannot be decompressed
+    /// - The binary format is corrupted
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::OpenDictionary;
+    ///
+    /// let bytes = std::fs::read("dictionary.odict")?;
+    /// let dict = OpenDictionary::from_bytes(&bytes)?;
+    /// println!("Dictionary version: {}", dict.version);
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
     pub fn from_bytes<T>(data: T) -> crate::Result<OpenDictionary>
     where
         T: AsRef<[u8]>,
@@ -104,6 +277,39 @@ impl OpenDictionary {
         })
     }
 
+    /// Load a compiled dictionary from a binary file.
+    ///
+    /// This method reads a binary ODict file from the specified path and
+    /// loads it into an [`OpenDictionary`] instance. The file path is stored
+    /// for reference.
+    ///
+    /// # Arguments
+    ///
+    /// * `path` - Path to the binary ODict file
+    ///
+    /// # Returns
+    ///
+    /// An [`OpenDictionary`] instance with the path information preserved.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The file cannot be read
+    /// - The file is not a valid ODict binary format
+    /// - Version compatibility issues
+    /// - File system permissions prevent access
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::OpenDictionary;
+    ///
+    /// let dict = OpenDictionary::from_path("dictionary.odict")?;
+    /// if let Some(path) = &dict.path {
+    ///     println!("Loaded from: {}", path.display());
+    /// }
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
     pub fn from_path<P: AsRef<Path>>(path: P) -> crate::Result<OpenDictionary> {
         let buffer = fs::read_to_bytes(&path)?;
         let mut result = Self::from_bytes(&buffer)?;
diff --git a/lib/src/core/resolve.rs b/lib/src/core/resolve.rs
index 9b4f541f..f03a2e45 100644
--- a/lib/src/core/resolve.rs
+++ b/lib/src/core/resolve.rs
@@ -1,8 +1,103 @@
+//! Entry resolution operations for ODict dictionaries.
+//!
+//! This module provides functionality to resolve (look up) dictionary entries
+//! by their exact term. Resolution is a simple, direct lookup operation that
+//! returns the entry if it exists, or None if not found.
+//!
+//! # Overview
+//!
+//! The resolve functionality allows you to:
+//! - Look up entries by exact term match
+//! - Get direct access to entry data structures
+//! - Perform fast O(1) lookups using the underlying hash map
+//!
+//! # Difference from Lookup
+//!
+//! Resolution differs from the more complex lookup operations in that it:
+//! - Only performs exact matches (no fuzzy matching or strategies)
+//! - Does not follow redirects or see_also links
+//! - Does not support case-insensitive fallback
+//! - Returns the raw entry structure rather than wrapped results
+//!
+//! # Examples
+//!
+//! ## Basic Entry Resolution
+//!
+//! ```rust
+//! use odict::Dictionary;
+//!
+//! let dict = Dictionary::from_path("dictionary.xml")?;
+//!
+//! if let Some(entry) = dict.resolve("hello") {
+//!     println!("Found entry for 'hello': {}", entry.term);
+//! } else {
+//!     println!("No entry found for 'hello'");
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## Working with Archived Dictionaries
+//!
+//! ```rust
+//! use odict::OpenDictionary;
+//!
+//! let dict = OpenDictionary::from_path("dictionary.odict")?;
+//! let archived = dict.contents()?;
+//!
+//! if let Some(entry) = archived.resolve("world") {
+//!     println!("Found archived entry: {}", entry.term.as_str());
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+
 use crate::schema::{ArchivedDictionary, ArchivedEntry, Dictionary, Entry};
 
 macro_rules! resolve {
     ($t:ident, $ret:ident) => {
         impl $t {
+            /// Resolve a dictionary entry by exact term match.
+            ///
+            /// This method performs a direct lookup in the dictionary's entry collection
+            /// using the provided term as the key. The lookup is case-sensitive and
+            /// requires an exact match.
+            ///
+            /// # Arguments
+            ///
+            /// * `term` - The exact term to look up in the dictionary
+            ///
+            /// # Returns
+            ///
+            /// - `Some(&Entry)` - A reference to the entry if found
+            /// - `None` - If no entry exists with the exact term
+            ///
+            /// # Examples
+            ///
+            /// ```rust
+            /// use odict::Dictionary;
+            ///
+            /// let dict = Dictionary::from_path("dictionary.xml")?;
+            ///
+            /// // Exact match lookup
+            /// if let Some(entry) = dict.resolve("hello") {
+            ///     println!("Term: {}", entry.term);
+            ///     println!("Etymologies: {}", entry.etymologies.len());
+            /// }
+            ///
+            /// // Case-sensitive - this might not match if entry is "Hello"
+            /// let result = dict.resolve("Hello");
+            /// # Ok::<(), Box<dyn std::error::Error>>(())
+            /// ```
+            ///
+            /// # Performance
+            ///
+            /// This operation has O(1) average time complexity as it uses the underlying
+            /// hash map for direct key lookup. In the worst case (hash collisions), it
+            /// may degrade to O(n) but this is rare in practice.
+            ///
+            /// # See Also
+            ///
+            /// For more advanced lookup operations with fuzzy matching, case-insensitive
+            /// search, and redirect following, see the [`lookup`](crate::core::lookup) module.
             pub fn resolve<'a>(&'a self, term: &str) -> Option<&'a $ret> {
                 self.entries.get(term)
             }
diff --git a/lib/src/core/version.rs b/lib/src/core/version.rs
index 605da252..6e592d2a 100644
--- a/lib/src/core/version.rs
+++ b/lib/src/core/version.rs
@@ -1,17 +1,100 @@
+//! Semantic versioning support for ODict dictionaries.
+//!
+//! This module provides a semantic versioning implementation that follows the
+//! [Semantic Versioning 2.0.0](https://semver.org/) specification. It's used
+//! to track dictionary format versions and ensure compatibility between different
+//! versions of the ODict library.
+//!
+//! # Overview
+//!
+//! The semantic versioning functionality provides:
+//! - Version parsing from strings
+//! - Version comparison and ordering
+//! - Compatibility checking between versions
+//! - Prerelease version support
+//!
+//! # Compatibility Rules
+//!
+//! Two versions are considered compatible if:
+//! - They have the same major version number
+//! - They have the same prerelease status (both stable or both prerelease)
+//!
+//! # Examples
+//!
+//! ## Creating and Comparing Versions
+//!
+//! ```rust
+//! use odict::SemanticVersion;
+//!
+//! let v1 = SemanticVersion::new(1, 2, 3, None);
+//! let v2: SemanticVersion = "1.2.4".into();
+//! let v3: SemanticVersion = "2.0.0".into();
+//!
+//! assert!(v1 < v2);
+//! assert!(v1.is_compatible(&v2));
+//! assert!(!v1.is_compatible(&v3));
+//! ```
+//!
+//! ## Working with Prerelease Versions
+//!
+//! ```rust
+//! use odict::SemanticVersion;
+//!
+//! let stable: SemanticVersion = "1.0.0".into();
+//! let prerelease: SemanticVersion = "1.0.0-alpha".into();
+//!
+//! assert!(prerelease < stable);
+//! assert!(!stable.is_compatible(&prerelease));
+//! ```
+
 use std::{
     cmp::Ordering,
     fmt::{Display, Formatter},
 };
 
+/// A semantic version following the Semantic Versioning 2.0.0 specification.
+///
+/// This struct represents a version number in the format `MAJOR.MINOR.PATCH[-PRERELEASE]`
+/// where each component has specific meaning:
+/// - **MAJOR**: Incremented for incompatible API changes
+/// - **MINOR**: Incremented for backwards-compatible functionality additions
+/// - **PATCH**: Incremented for backwards-compatible bug fixes
+/// - **PRERELEASE**: Optional identifier for pre-release versions
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct SemanticVersion {
+    /// Major version number (incompatible API changes)
     pub major: u64,
+    /// Minor version number (backwards-compatible additions)
     pub minor: u64,
+    /// Patch version number (backwards-compatible fixes)
     pub patch: u64,
+    /// Optional prerelease identifier (e.g., "alpha", "beta", "rc.1")
     pub prerelease: Option<String>,
 }
 
 impl SemanticVersion {
+    /// Create a new semantic version.
+    ///
+    /// # Arguments
+    ///
+    /// * `major` - Major version number
+    /// * `minor` - Minor version number
+    /// * `patch` - Patch version number
+    /// * `prerelease` - Optional prerelease identifier
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::SemanticVersion;
+    ///
+    /// // Stable version
+    /// let stable = SemanticVersion::new(1, 2, 3, None);
+    /// assert_eq!(stable.to_string(), "1.2.3");
+    ///
+    /// // Prerelease version
+    /// let prerelease = SemanticVersion::new(1, 2, 3, Some("alpha".to_string()));
+    /// assert_eq!(prerelease.to_string(), "1.2.3-alpha");
+    /// ```
     pub fn new(major: u64, minor: u64, patch: u64, prerelease: Option<String>) -> Self {
         Self {
             major,
@@ -21,10 +104,56 @@ impl SemanticVersion {
         }
     }
 
+    /// Check if this version is compatible with another version.
+    ///
+    /// Two versions are compatible if they have the same major version and
+    /// the same prerelease status (both stable or both prerelease with the
+    /// same identifier).
+    ///
+    /// # Arguments
+    ///
+    /// * `other` - The version to check compatibility against
+    ///
+    /// # Returns
+    ///
+    /// `true` if the versions are compatible, `false` otherwise.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::SemanticVersion;
+    ///
+    /// let v1_0_0: SemanticVersion = "1.0.0".into();
+    /// let v1_2_3: SemanticVersion = "1.2.3".into();
+    /// let v2_0_0: SemanticVersion = "2.0.0".into();
+    /// let v1_0_0_alpha: SemanticVersion = "1.0.0-alpha".into();
+    ///
+    /// assert!(v1_0_0.is_compatible(&v1_2_3));  // Same major version
+    /// assert!(!v1_0_0.is_compatible(&v2_0_0)); // Different major version
+    /// assert!(!v1_0_0.is_compatible(&v1_0_0_alpha)); // Different prerelease status
+    /// ```
     pub fn is_compatible(&self, other: &Self) -> bool {
         self.major == other.major && self.prerelease.as_deref() == other.prerelease.as_deref()
     }
 
+    /// Convert the version to a byte vector.
+    ///
+    /// This method converts the version string representation to UTF-8 bytes,
+    /// which is useful for serialization and storage in binary formats.
+    ///
+    /// # Returns
+    ///
+    /// A `Vec<u8>` containing the UTF-8 encoded version string.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::SemanticVersion;
+    ///
+    /// let version: SemanticVersion = "1.2.3".into();
+    /// let bytes = version.as_bytes();
+    /// assert_eq!(bytes, b"1.2.3");
+    /// ```
     pub fn as_bytes(&self) -> Vec<u8> {
         self.to_string().into_bytes()
     }
diff --git a/lib/src/core/write.rs b/lib/src/core/write.rs
index 21d33ea1..066a4069 100644
--- a/lib/src/core/write.rs
+++ b/lib/src/core/write.rs
@@ -1,3 +1,54 @@
+//! Dictionary writing and persistence operations for ODict.
+//!
+//! This module provides functionality to save compiled dictionaries to disk
+//! in the binary ODict format. It handles file creation, binary serialization,
+//! and path management for persistent storage.
+//!
+//! # Overview
+//!
+//! The writing functionality allows you to:
+//! - Save compiled dictionaries to disk
+//! - Customize compilation options during save
+//! - Automatically update path references
+//! - Ensure data integrity through proper file handling
+//!
+//! # File Format
+//!
+//! Dictionaries are saved in the binary ODict format, which includes:
+//! - Format signature and version information
+//! - Compressed serialized dictionary data
+//! - Metadata for compatibility checking
+//!
+//! # Examples
+//!
+//! ## Basic Dictionary Saving
+//!
+//! ```rust
+//! use odict::{Dictionary, OpenDictionary};
+//!
+//! let dict = Dictionary::from_path("source.xml")?;
+//! let mut compiled = dict.build()?;
+//!
+//! // Save with default options
+//! compiled.to_disk("output.odict")?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## Saving with Custom Options
+//!
+//! ```rust
+//! use odict::{Dictionary, OpenDictionary, CompilerOptions, CompressOptions};
+//!
+//! let dict = Dictionary::from_path("source.xml")?;
+//! let mut compiled = dict.build()?;
+//!
+//! let options = CompilerOptions::default()
+//!     .with_compression(CompressOptions::default());
+//!
+//! compiled.to_disk_with_options("output.odict", options)?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+
 use std::fs::canonicalize;
 use std::path::Path;
 use std::{fs::File, io::Write};
@@ -6,10 +57,108 @@ use crate::compile::CompilerOptions;
 use crate::OpenDictionary;
 
 impl OpenDictionary {
+    /// Save the dictionary to disk using default compilation options.
+    ///
+    /// This method writes the dictionary to the specified file path in the
+    /// binary ODict format. It uses default compression settings and updates
+    /// the dictionary's internal path reference to the saved location.
+    ///
+    /// # Arguments
+    ///
+    /// * `path` - The file path where the dictionary should be saved
+    ///
+    /// # Returns
+    ///
+    /// `Ok(())` if the save operation succeeds.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The file cannot be created or written to
+    /// - Compilation/compression fails
+    /// - File system permissions prevent writing
+    /// - The path cannot be canonicalized
+    ///
+    /// # Side Effects
+    ///
+    /// - Creates or overwrites the file at the specified path
+    /// - Updates the dictionary's internal path reference
+    /// - Ensures all data is flushed to disk
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::{Dictionary, OpenDictionary};
+    ///
+    /// let dict = Dictionary::from_path("source.xml")?;
+    /// let mut compiled = dict.build()?;
+    ///
+    /// compiled.to_disk("my_dictionary.odict")?;
+    ///
+    /// // Path is now updated
+    /// if let Some(path) = &compiled.path {
+    ///     println!("Saved to: {}", path.display());
+    /// }
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
     pub fn to_disk<P: AsRef<Path>>(&mut self, path: P) -> crate::Result<()> {
         self.to_disk_with_options(path, CompilerOptions::default())
     }
 
+    /// Save the dictionary to disk with custom compilation options.
+    ///
+    /// This method provides fine-grained control over the save process,
+    /// allowing customization of compression settings and other compilation
+    /// options. The dictionary is written in the binary ODict format.
+    ///
+    /// # Arguments
+    ///
+    /// * `path` - The file path where the dictionary should be saved
+    /// * `options` - Compilation options to customize the save process
+    ///
+    /// # Returns
+    ///
+    /// `Ok(())` if the save operation succeeds.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The file cannot be created or written to
+    /// - Compilation fails with the specified options
+    /// - Compression fails
+    /// - File system permissions prevent writing
+    /// - The path cannot be canonicalized
+    ///
+    /// # Side Effects
+    ///
+    /// - Creates or overwrites the file at the specified path
+    /// - Updates the dictionary's internal path reference to the canonical path
+    /// - Ensures all data is properly flushed to disk
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use odict::{Dictionary, OpenDictionary, CompilerOptions, CompressOptions};
+    ///
+    /// let dict = Dictionary::from_path("source.xml")?;
+    /// let mut compiled = dict.build()?;
+    ///
+    /// // Use custom compression settings
+    /// let options = CompilerOptions::default()
+    ///     .with_compression(CompressOptions::default());
+    ///
+    /// compiled.to_disk_with_options("optimized.odict", options)?;
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    ///
+    /// # Performance
+    ///
+    /// The save operation involves:
+    /// 1. Compiling the dictionary to binary format with specified options
+    /// 2. Creating/opening the target file
+    /// 3. Writing all data to disk
+    /// 4. Flushing to ensure data persistence
+    /// 5. Canonicalizing the path for accurate reference
     pub fn to_disk_with_options<Options: AsRef<CompilerOptions>, P: AsRef<Path>>(
         &mut self,
         path: P,
@@ -21,9 +170,7 @@ impl OpenDictionary {
         file.write_all(&buf)?;
         file.flush()?;
 
-        self.path = canonicalize(path)?
-            .to_str()
-            .map(std::path::PathBuf::from);
+        self.path = canonicalize(path)?.to_str().map(std::path::PathBuf::from);
 
         Ok(())
     }
diff --git a/node/Cargo.toml b/node/Cargo.toml
index 35cf7c86..981c1c47 100644
--- a/node/Cargo.toml
+++ b/node/Cargo.toml
@@ -5,7 +5,7 @@ version = "1.1.1"
 publish = false
 
 [lib]
-crate-type = ["cdylib"]
+crate-type = ["cdylib", "rlib"]
 
 [features]
 default = []
diff --git a/python/Cargo.toml b/python/Cargo.toml
index 46437e8f..09ed73ff 100644
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -5,7 +5,7 @@ edition = "2021"
 
 [lib]
 name = "theopendictionary"
-crate-type = ["cdylib"]
+crate-type = ["cdylib", "rlib"]
 
 [dependencies]
 pyo3 = { version = "0.27.2", features = ["either"] }
diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs
index 993a331f..219e6285 100644
--- a/python/src/dictionary.rs
+++ b/python/src/dictionary.rs
@@ -9,6 +9,16 @@ use crate::{
     utils::cast_error,
 };
 
+/// Compiles an ODXML string into binary `.odict` data.
+///
+/// Takes an XML string conforming to the ODict XML schema and returns
+/// the compiled binary representation as a byte vector. The resulting
+/// bytes can be passed to [`OpenDictionary::new`] or saved to disk.
+///
+/// # Errors
+///
+/// Returns an error if the XML is malformed or does not conform to the
+/// ODict schema.
 #[pyfunction]
 pub fn compile(xml: String) -> PyResult<Vec<u8>> {
     let bytes = xml
@@ -19,6 +29,15 @@ pub fn compile(xml: String) -> PyResult<Vec<u8>> {
     Ok(bytes)
 }
 
+/// The main class for working with compiled ODict dictionaries.
+///
+/// An `OpenDictionary` wraps a compiled binary dictionary and provides
+/// methods for looking up terms, full-text search, tokenization, and more.
+///
+/// # Construction
+///
+/// Create from compiled bytes or an XML string using [`OpenDictionary::new`],
+/// or load from a file path or remote registry using [`OpenDictionary::load`].
 #[pyclass]
 pub struct OpenDictionary {
     dict: odict::OpenDictionary,
@@ -26,6 +45,11 @@ pub struct OpenDictionary {
 
 #[pymethods]
 impl OpenDictionary {
+    /// Loads a dictionary from a file path, alias, or remote identifier.
+    ///
+    /// This is an async method. If `dictionary` is a path to a `.odict` file,
+    /// it loads from disk. If it matches the format `org/lang` (e.g. `wiktionary/eng`),
+    /// it downloads from the remote registry.
     #[staticmethod]
     #[pyo3(signature = (dictionary, options=None))]
     pub fn load<'py>(
@@ -50,6 +74,10 @@ impl OpenDictionary {
         })
     }
 
+    /// Creates a dictionary from compiled binary data or directly from an XML string.
+    ///
+    /// Accepts either `bytes` (as returned by [`compile`]) or a `str` containing
+    /// ODXML markup.
     #[new]
     pub fn new(data: Either<Vec<u8>, String>) -> PyResult<Self> {
         let bytes = match data {
@@ -60,6 +88,10 @@ impl OpenDictionary {
         Ok(Self { dict })
     }
 
+    /// Saves the dictionary to disk as a `.odict` file.
+    ///
+    /// Optionally configure Brotli compression via `quality` (0–11) and
+    /// `window_size` (0–22).
     #[pyo3(signature = (path, quality=None, window_size=None))]
     pub fn save(
         &mut self,
@@ -89,16 +121,24 @@ impl OpenDictionary {
         }
     }
 
+    /// The minimum rank value across all entries, or `None` if no entries have ranks.
     #[getter]
     pub fn min_rank(&self) -> PyResult<Option<u32>> {
         Ok(self.dict.contents().map_err(cast_error)?.min_rank())
     }
 
+    /// The maximum rank value across all entries, or `None` if no entries have ranks.
     #[getter]
     pub fn max_rank(&self) -> PyResult<Option<u32>> {
         Ok(self.dict.contents().map_err(cast_error)?.max_rank())
     }
 
+    /// Looks up one or more terms by exact match.
+    ///
+    /// - `query` — a single term or list of terms to look up.
+    /// - `split` — minimum word length for compound splitting.
+    /// - `follow` — follow `see_also` cross-references until an entry with etymologies is found.
+    /// - `insensitive` — enable case-insensitive matching.
     #[pyo3(signature = (query, split=None, follow=None, insensitive=None))]
     pub fn lookup(
         &self,
@@ -135,6 +175,7 @@ impl OpenDictionary {
         Ok(mapped)
     }
 
+    /// Returns all terms defined in the dictionary, sorted alphabetically.
     pub fn lexicon(&self) -> PyResult<Vec<&str>> {
         let dict = self.dict.contents().map_err(cast_error)?;
         let lexicon = dict.lexicon();
@@ -142,6 +183,9 @@ impl OpenDictionary {
         Ok(lexicon)
     }
 
+    /// Creates a full-text search index for the dictionary.
+    ///
+    /// Must be called before [`OpenDictionary::search`].
     #[pyo3(signature = (options=None))]
     pub fn index(&self, options: Option<IndexOptions>) -> PyResult<()> {
         let dict = self.dict.contents().map_err(cast_error)?;
@@ -153,6 +197,9 @@ impl OpenDictionary {
         Ok(())
     }
 
+    /// Runs a full-text search across the dictionary.
+    ///
+    /// Requires an index — call [`OpenDictionary::index`] first.
     #[pyo3(signature = (query, options=None))]
     pub fn search(&self, query: String, options: Option<SearchOptions>) -> PyResult<Vec<Entry>> {
         let dict = self.dict.contents().map_err(cast_error)?;
@@ -170,6 +217,14 @@ impl OpenDictionary {
         Ok(entries)
     }
 
+    /// Tokenizes text using NLP-based segmentation and matches each token against the dictionary.
+    ///
+    /// Supports Chinese, Japanese, Korean, Thai, Khmer, German, Swedish,
+    /// and Latin-script languages.
+    ///
+    /// - `text` — the text to tokenize.
+    /// - `follow` — follow `see_also` cross-references. Accepts `True`/`False` or a number (nonzero = follow).
+    /// - `insensitive` — enable case-insensitive matching.
     #[pyo3(signature = (text, follow=None, insensitive=None))]
     pub fn tokenize(
         &self,
diff --git a/python/src/types/definition.rs b/python/src/types/definition.rs
index 5094c218..e3c2495f 100644
--- a/python/src/types/definition.rs
+++ b/python/src/types/definition.rs
@@ -3,16 +3,23 @@ use structural_convert::StructuralConvert;
 
 use super::{note::Note, Example};
 
+/// A single definition of a word sense.
+///
+/// Contains the definition text along with optional examples and notes.
 #[pyclass]
 #[derive(Debug, Clone, StructuralConvert)]
 #[convert(from(odict::schema::Definition))]
 pub struct Definition {
+    /// Optional identifier for this definition.
     #[pyo3(get)]
     pub id: Option<String>,
+    /// The definition text.
     #[pyo3(get)]
     pub value: String,
+    /// Usage examples illustrating this definition.
     #[pyo3(get)]
     pub examples: Vec<Example>,
+    /// Additional notes about this definition.
     #[pyo3(get)]
     pub notes: Vec<Note>,
 }
diff --git a/python/src/types/entry.rs b/python/src/types/entry.rs
index 23b06085..78dbf4c6 100644
--- a/python/src/types/entry.rs
+++ b/python/src/types/entry.rs
@@ -6,18 +6,27 @@ use crate::utils::cast_error;
 use super::etymology::Etymology;
 use super::media_url::MediaURL;
 
+/// A dictionary entry representing a single headword and its associated data.
+///
+/// Each entry contains the term itself, optional ranking metadata,
+/// cross-reference information, etymologies, and media attachments.
 #[pyclass]
 #[derive(Debug, Clone, StructuralConvert)]
 #[convert(from(odict::schema::Entry))]
 pub struct Entry {
+    /// The headword for this entry.
     #[pyo3(get)]
     pub term: String,
+    /// Optional frequency rank for ordering entries.
     #[pyo3(get)]
     pub rank: Option<u32>,
+    /// Cross-reference target term, if this entry redirects to another.
     #[pyo3(get)]
     pub see_also: Option<String>,
+    /// The etymologies associated with this entry.
     #[pyo3(get)]
     pub etymologies: Vec<Etymology>,
+    /// Media URLs (audio, images, etc.) associated with this entry.
     #[pyo3(get)]
     pub media: Vec<MediaURL>,
 }
diff --git a/python/src/types/enums.rs b/python/src/types/enums.rs
index 913cddf3..6b365cf2 100644
--- a/python/src/types/enums.rs
+++ b/python/src/types/enums.rs
@@ -1,16 +1,23 @@
 use pyo3::prelude::*;
 use structural_convert::StructuralConvert;
 
+/// A wrapper for ODict enumeration values (e.g. part of speech, pronunciation kind).
+///
+/// ODict enums are represented as string triples: the enum name,
+/// the variant name, and the variant's string value.
 #[pyclass]
 #[derive(Debug, PartialEq, Clone, StructuralConvert)]
 #[convert(from(internal::EnumWrapper))]
 pub struct EnumWrapper {
+    /// The enum type name (e.g. `"PartOfSpeech"`).
     #[pyo3(get)]
     pub name: String,
 
+    /// The variant name (e.g. `"Noun"`).
     #[pyo3(get)]
     pub variant: String,
 
+    /// The string value of the variant (e.g. `"n"`).
     #[pyo3(get)]
     pub value: String,
 }
diff --git a/python/src/types/etymology.rs b/python/src/types/etymology.rs
index bf06b166..f3a1b540 100644
--- a/python/src/types/etymology.rs
+++ b/python/src/types/etymology.rs
@@ -5,15 +5,23 @@ use pyo3::prelude::*;
 use super::pronunciation::Pronunciation;
 use super::sense::Sense;
 
+/// An etymology grouping for a dictionary entry.
+///
+/// Etymologies group together senses that share a common word origin.
+/// Each etymology can have its own pronunciations and description.
 #[pyclass]
 #[derive(Clone)]
 pub struct Etymology {
+    /// Optional identifier for this etymology.
     #[pyo3(get)]
     pub id: Option<String>,
+    /// Pronunciations associated with this etymology.
     #[pyo3(get)]
     pub pronunciations: Vec<Pronunciation>,
+    /// Optional description of the word origin.
     #[pyo3(get)]
     pub description: Option<String>,
+    /// The senses (meanings) under this etymology.
     #[pyo3(get)]
     pub senses: Vec<Sense>,
 }
diff --git a/python/src/types/example.rs b/python/src/types/example.rs
index 10047079..a2615f4f 100644
--- a/python/src/types/example.rs
+++ b/python/src/types/example.rs
@@ -3,16 +3,22 @@ use crate::types::{Pronunciation, Translation};
 use pyo3::prelude::*;
 use structural_convert::StructuralConvert;
 
+/// A usage example illustrating a definition.
+///
+/// Examples can optionally include translations and pronunciations.
 #[pyclass]
 #[derive(Debug, Clone, StructuralConvert)]
 #[convert(from(odict::schema::Example))]
 pub struct Example {
+    /// The example text.
     #[pyo3(get)]
     pub value: String,
 
+    /// Translations of this example into other languages.
     #[pyo3(get)]
     pub translations: Vec<Translation>,
 
+    /// Pronunciations for this example.
     #[pyo3(get)]
     pub pronunciations: Vec<Pronunciation>,
 }
diff --git a/python/src/types/form.rs b/python/src/types/form.rs
index 198527c8..5e1862ca 100644
--- a/python/src/types/form.rs
+++ b/python/src/types/form.rs
@@ -3,15 +3,22 @@ use pyo3::prelude::*;
 
 use super::enums::EnumWrapper;
 
+/// An inflected or alternate form of a word.
+///
+/// Forms represent morphological variants such as plurals, conjugations,
+/// or other inflections.
 #[pyclass]
 #[derive(Clone, Debug)]
 pub struct Form {
+    /// The inflected form text.
     #[pyo3(get)]
     pub term: String,
 
+    /// The kind of form (e.g. plural, past tense), or `None`.
     #[pyo3(get, set)]
     pub kind: Option<EnumWrapper>,
 
+    /// Tags for categorizing this form.
     #[pyo3(get)]
     pub tags: Vec<String>,
 }
diff --git a/python/src/types/group.rs b/python/src/types/group.rs
index 60c9b265..7f9c951b 100644
--- a/python/src/types/group.rs
+++ b/python/src/types/group.rs
@@ -3,14 +3,21 @@ use structural_convert::StructuralConvert;
 
 use super::definition::Definition;
 
+/// A named group of related definitions.
+///
+/// Groups allow organizing multiple definitions under a shared description,
+/// such as grouping definitions by semantic domain.
 #[pyclass]
 #[derive(Debug, Clone, StructuralConvert)]
 #[convert(from(odict::schema::Group))]
 pub struct Group {
+    /// Optional identifier for this group.
     #[pyo3(get)]
     pub id: Option<String>,
+    /// A description of what this group of definitions has in common.
     #[pyo3(get)]
     pub description: String,
+    /// The definitions within this group.
     #[pyo3(get)]
     pub definitions: Vec<Definition>,
 }
diff --git a/python/src/types/index.rs b/python/src/types/index.rs
index 535bdb0b..ab21daef 100644
--- a/python/src/types/index.rs
+++ b/python/src/types/index.rs
@@ -1,14 +1,18 @@
 use pyo3::prelude::*;
 
+/// Options for configuring full-text index creation.
 #[pyclass]
 #[derive(Clone)]
 pub struct IndexOptions {
+    /// Custom directory for storing the index.
     #[pyo3(get, set)]
     pub directory: Option<String>,
 
+    /// Memory arena size per thread in bytes (must be >15 MB).
     #[pyo3(get, set)]
     pub memory: Option<usize>,
 
+    /// Whether to overwrite an existing index.
     #[pyo3(get, set)]
     pub overwrite: Option<bool>,
 }
diff --git a/python/src/types/load.rs b/python/src/types/load.rs
index 51ee8d98..b2f0014c 100644
--- a/python/src/types/load.rs
+++ b/python/src/types/load.rs
@@ -1,12 +1,16 @@
 use pyo3::prelude::*;
 
+/// Options for loading dictionaries from remote registries.
 #[pyclass]
 #[derive(PartialEq, Default, Clone, Eq)]
 pub struct RemoteLoadOptions {
+    /// Custom output directory for downloaded files.
     #[pyo3(get, set)]
     pub out_dir: Option<String>,
+    /// Whether to cache downloaded dictionaries locally.
     #[pyo3(get, set)]
     pub caching: Option<bool>,
+    /// Number of download retries on failure.
     #[pyo3(get, set)]
     pub retries: Option<u32>,
 }
@@ -20,11 +24,14 @@ impl RemoteLoadOptions {
     }
 }
 
+/// Options for loading a dictionary from a file path, alias, or remote registry.
 #[pyclass]
 #[derive(PartialEq, Default, Clone, Eq)]
 pub struct LoadOptions {
+    /// Custom configuration directory.
     #[pyo3(get, set)]
     pub config_dir: Option<String>,
+    /// Options for remote dictionary loading.
     #[pyo3(get, set)]
     pub remote: Option<RemoteLoadOptions>,
 }
diff --git a/python/src/types/lookup.rs b/python/src/types/lookup.rs
index 55c5b1ff..4888abca 100644
--- a/python/src/types/lookup.rs
+++ b/python/src/types/lookup.rs
@@ -2,15 +2,19 @@ use pyo3::prelude::*;
 
 use super::Entry;
 
+/// Options for configuring term lookups.
 #[pyclass]
 #[derive(Clone)]
 pub struct LookupOptions {
+    /// Minimum word length for compound splitting.
     #[pyo3(get, set)]
     pub split: Option<u32>,
 
+    /// Whether to follow `see_also` cross-references.
     #[pyo3(get, set)]
     pub follow: Option<bool>,
 
+    /// Whether to enable case-insensitive matching.
     #[pyo3(get, set)]
     pub insensitive: Option<bool>,
 }
@@ -58,11 +62,17 @@ impl From<LookupOptions> for odict::lookup::LookupOptions {
     }
 }
 
+/// The result of a dictionary lookup.
+///
+/// Contains the matched entry and, if a `see_also` redirect was followed,
+/// the original entry that initiated the redirect.
 #[pyclass]
 #[derive(Debug, Clone)]
 pub struct LookupResult {
+    /// The matched dictionary entry.
     #[pyo3(get)]
     pub entry: Entry,
+    /// The original entry if a `see_also` redirect was followed, or `None`.
     #[pyo3(get)]
     pub directed_from: Option<Entry>,
 }
diff --git a/python/src/types/media_url.rs b/python/src/types/media_url.rs
index f77fbf86..296e00df 100644
--- a/python/src/types/media_url.rs
+++ b/python/src/types/media_url.rs
@@ -2,16 +2,20 @@ use pyo3::prelude::*;
 use std::fmt;
 use structural_convert::StructuralConvert;
 
+/// A reference to an external media resource (audio, image, etc.).
 #[pyclass]
 #[derive(Clone, Debug, StructuralConvert)]
 #[convert(from(odict::schema::MediaURL))]
 pub struct MediaURL {
+    /// URL or path to the media file.
     #[pyo3(get)]
     pub src: String,
 
+    /// MIME type (e.g. `audio/mpeg`), or `None`.
     #[pyo3(get)]
     pub mime_type: Option<String>,
 
+    /// Human-readable description of the media.
     #[pyo3(get)]
     pub description: Option<String>,
 }
diff --git a/python/src/types/note.rs b/python/src/types/note.rs
index 811d7706..82499923 100644
--- a/python/src/types/note.rs
+++ b/python/src/types/note.rs
@@ -3,14 +3,21 @@ use structural_convert::StructuralConvert;
 
 use super::Example;
 
+/// An additional note attached to a definition.
+///
+/// Notes provide supplementary information such as usage guidance,
+/// historical context, or grammatical remarks.
 #[pyclass]
 #[derive(Debug, Clone, StructuralConvert)]
 #[convert(from(odict::schema::Note))]
 pub struct Note {
+    /// Optional identifier for this note.
     #[pyo3(get)]
     pub id: Option<String>,
+    /// The note text.
     #[pyo3(get)]
     pub value: String,
+    /// Examples associated with this note.
     #[pyo3(get)]
     pub examples: Vec<Example>,
 }
diff --git a/python/src/types/pronunciation.rs b/python/src/types/pronunciation.rs
index 8762971a..f6100f04 100644
--- a/python/src/types/pronunciation.rs
+++ b/python/src/types/pronunciation.rs
@@ -5,15 +5,22 @@ use super::media_url::MediaURL;
 
 use internal::ToEnumWrapper;
 
+/// A pronunciation entry for a word or etymology.
+///
+/// Represents how a word is pronounced in a given notation system
+/// (e.g. IPA, Pinyin), with optional audio media.
 #[pyclass]
 #[derive(Clone, Debug)]
 pub struct Pronunciation {
+    /// The pronunciation system (e.g. IPA, Pinyin), or `None`.
     #[pyo3(get)]
     pub kind: Option<EnumWrapper>,
 
+    /// The pronunciation notation string.
     #[pyo3(get)]
     pub value: String,
 
+    /// Audio media URLs for this pronunciation.
     #[pyo3(get)]
     pub media: Vec<MediaURL>,
 }
diff --git a/python/src/types/save.rs b/python/src/types/save.rs
index 28a67eb3..424f4e1f 100644
--- a/python/src/types/save.rs
+++ b/python/src/types/save.rs
@@ -1,11 +1,14 @@
 use pyo3::prelude::*;
 
+/// Brotli compression options for saving dictionaries.
 #[pyclass]
 #[derive(PartialEq, Default, Clone, Eq)]
 pub struct CompressOptions {
+    /// Compression quality level (0–11).
     #[pyo3(get, set)]
     pub quality: Option<u32>,
-    
+
+    /// Compression window size (0–22).
     #[pyo3(get, set)]
     pub window_size: Option<u32>,
 }
@@ -22,9 +25,11 @@ impl CompressOptions {
     }
 }
 
+/// Options for saving a dictionary to disk.
 #[pyclass]
 #[derive(PartialEq, Default, Clone, Eq)]
 pub struct SaveOptions {
+    /// Optional Brotli compression settings.
     #[pyo3(get, set)]
     pub compress: Option<CompressOptions>,
 }
diff --git a/python/src/types/search.rs b/python/src/types/search.rs
index 6e24b897..b893f5c6 100644
--- a/python/src/types/search.rs
+++ b/python/src/types/search.rs
@@ -1,17 +1,22 @@
 use pyo3::prelude::*;
 
+/// Options for configuring full-text search.
 #[pyclass]
 #[derive(Clone)]
 pub struct SearchOptions {
+    /// Custom directory for the search index.
     #[pyo3(get, set)]
     pub directory: Option<String>,
 
+    /// Relevance score threshold for filtering results.
     #[pyo3(get, set)]
     pub threshold: Option<u32>,
 
+    /// Whether to automatically create an index if one does not exist.
     #[pyo3(get, set)]
     pub autoindex: Option<bool>,
 
+    /// Maximum number of results to return.
     #[pyo3(get, set)]
     pub limit: Option<usize>,
 }
diff --git a/python/src/types/sense.rs b/python/src/types/sense.rs
index 29b736dc..d909f42f 100644
--- a/python/src/types/sense.rs
+++ b/python/src/types/sense.rs
@@ -7,19 +7,30 @@ use super::{
     definition::Definition, enums::EnumWrapper, form::Form, group::Group, translation::Translation,
 };
 
+/// A word sense — a specific meaning grouped by part of speech.
+///
+/// Senses represent distinct meanings of a word under a given etymology.
+/// Each sense has a part of speech and contains definitions (or definition groups),
+/// along with optional tags, translations, and inflected forms.
 #[pyclass]
 #[derive(Debug, Clone)]
 pub struct Sense {
+    /// The part of speech for this sense (e.g. noun, verb, adjective).
     #[pyo3(get)]
     pub pos: EnumWrapper,
+    /// Optional lemma reference linking to another entry.
     #[pyo3(get)]
     pub lemma: Option<String>,
+    /// Definitions or definition groups under this sense.
     #[pyo3(get)]
     pub definitions: Vec<Either<Definition, Group>>,
+    /// Tags for categorizing or filtering this sense.
     #[pyo3(get)]
     pub tags: Vec<String>,
+    /// Translations of this sense into other languages.
     #[pyo3(get)]
     pub translations: Vec<Translation>,
+    /// Inflected forms of the word under this sense.
     #[pyo3(get)]
     pub forms: Vec<Form>,
 }
diff --git a/python/src/types/token.rs b/python/src/types/token.rs
index ff0fe16f..9da91ba2 100644
--- a/python/src/types/token.rs
+++ b/python/src/types/token.rs
@@ -3,21 +3,32 @@ use pyo3::prelude::*;
 
 use super::LookupResult;
 
+/// A token produced by NLP-based text segmentation.
+///
+/// Each token represents a segment of the input text, with metadata about
+/// its position, detected language and script, and any matching dictionary entries.
 #[pyclass]
 #[derive(Debug)]
 pub struct Token {
+    /// The original token text (lemma form).
     #[pyo3(get)]
     pub lemma: String,
+    /// Detected language code (e.g. `"eng"`), or `None` if unknown.
     #[pyo3(get)]
     pub language: Option<String>,
+    /// Matched dictionary entries for this token.
     #[pyo3(get)]
     pub entries: Vec<LookupResult>,
+    /// The token kind (e.g. `"Word"`, `"Punctuation"`).
     #[pyo3(get)]
     pub kind: String,
+    /// Detected script name (e.g. `"Latin"`, `"Han"`).
     #[pyo3(get)]
     pub script: String,
+    /// Start byte offset in the original text.
     #[pyo3(get)]
     pub start: usize,
+    /// End byte offset in the original text.
     #[pyo3(get)]
     pub end: usize,
 }
diff --git a/python/src/types/tokenize.rs b/python/src/types/tokenize.rs
index fdcc2fe1..43cf11af 100644
--- a/python/src/types/tokenize.rs
+++ b/python/src/types/tokenize.rs
@@ -1,12 +1,15 @@
 use either::Either;
 use pyo3::prelude::*;
 
+/// Options for configuring text tokenization.
 #[pyclass]
 #[derive(Clone)]
 pub struct TokenizeOptions {
+    /// Whether to follow `see_also` cross-references. Accepts `True`/`False` or a number (nonzero = follow).
     #[pyo3(get, set)]
     pub follow: Option<Either<bool, u32>>,
 
+    /// Whether to enable case-insensitive matching.
     #[pyo3(get, set)]
     pub insensitive: Option<bool>,
 }
diff --git a/python/src/types/translation.rs b/python/src/types/translation.rs
index b0dcede2..37663d49 100644
--- a/python/src/types/translation.rs
+++ b/python/src/types/translation.rs
@@ -1,13 +1,16 @@
 use pyo3::prelude::*;
 use structural_convert::StructuralConvert;
 
+/// A translation of a word, definition, or example into another language.
 #[pyclass]
 #[derive(Debug, Clone, StructuralConvert)]
 #[convert(from(odict::schema::Translation))]
 pub struct Translation {
+    /// The BCP-47 language code (e.g. `"fra"`, `"deu"`).
     #[pyo3(get)]
     pub lang: String,
 
+    /// The translated text.
     #[pyo3(get)]
     pub value: String,
 }
diff --git a/scripts/rustdoc-to-md.py b/scripts/rustdoc-to-md.py
new file mode 100755
index 00000000..7d29bb6c
--- /dev/null
+++ b/scripts/rustdoc-to-md.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+"""
+Extracts Rust doc comments (///) from source files and generates Markdown documentation.
+
+This script parses Rust source files for structs, functions, and their fields/methods,
+then formats the extracted documentation as Markdown suitable for inclusion in a
+documentation site.
+
+Usage:
+    python scripts/rustdoc-to-md.py python/src       # Generate docs for Python bindings
+    python scripts/rustdoc-to-md.py node/src          # Generate docs for Node bindings
+
+Alternatively, use cargo rustdoc with JSON output and the `rustdoc-md` tool:
+    RUSTC_BOOTSTRAP=1 cargo rustdoc -p odict_python -- -Z unstable-options --output-format json
+    rustdoc-md target/doc/theopendictionary.json -o docs/python-api.md
+"""
+
+import re
+import sys
+from dataclasses import dataclass, field
+from pathlib import Path
+
+
+@dataclass
+class DocItem:
+    """A documented item extracted from Rust source."""
+    name: str
+    kind: str  # "struct", "function", "method", "field", "getter"
+    doc: str
+    signature: str = ""
+    fields: list = field(default_factory=list)
+    methods: list = field(default_factory=list)
+
+
+def extract_doc_comment(lines: list[str], end_idx: int) -> str:
+    """Extract consecutive /// doc comments ending at or before the given line.
+
+    Skips over attribute lines (#[...]) to find the doc comment block.
+    """
+    # First, skip backwards over attribute lines
+    i = end_idx
+    while i >= 0:
+        stripped = lines[i].strip()
+        if stripped.startswith("///"):
+            break
+        elif stripped.startswith("#[") or stripped == "":
+            i -= 1
+            continue
+        else:
+            return ""
+        i -= 1
+
+    # Now extract the doc comment lines
+    doc_lines = []
+    while i >= 0 and lines[i].strip().startswith("///"):
+        comment = lines[i].strip().removeprefix("///")
+        # Preserve leading space for indented content, but strip the first space
+        if comment.startswith(" "):
+            comment = comment[1:]
+        doc_lines.append(comment)
+        i -= 1
+    doc_lines.reverse()
+    return "\n".join(doc_lines).strip()
+
+
+def parse_rust_file(filepath: Path) -> list[DocItem]:
+    """Parse a Rust file and extract documented items."""
+    content = filepath.read_text()
+    lines = content.splitlines()
+    items = []
+
+    i = 0
+    while i < len(lines):
+        line = lines[i].strip()
+
+        # Detect #[pyfunction]
+        if line == "#[pyfunction]":
+            doc = extract_doc_comment(lines, i - 1)
+            # Find the function signature
+            j = i + 1
+            while j < len(lines) and not lines[j].strip().startswith("pub fn "):
+                j += 1
+            if j < len(lines):
+                sig = extract_fn_signature(lines, j)
+                name = re.search(r"pub fn (\w+)", lines[j])
+                if name:
+                    items.append(DocItem(
+                        name=name.group(1),
+                        kind="function",
+                        doc=doc,
+                        signature=sig,
+                    ))
+
+        # Detect #[pyclass] or #[napi(object)]
+        if line == "#[pyclass]" or line.startswith("#[pyclass") or line.startswith("#[napi"):
+            # Only match struct-level napi, not method-level
+            if line.startswith("#[napi") and "object" not in line and "constructor" not in line:
+                i += 1
+                continue
+
+            doc = extract_doc_comment(lines, i - 1)
+            # Find the struct name
+            j = i + 1
+            while j < len(lines) and not lines[j].strip().startswith("pub struct "):
+                j += 1
+            if j < len(lines):
+                name_match = re.search(r"pub struct (\w+)", lines[j])
+                if name_match:
+                    struct_name = name_match.group(1)
+                    struct_item = DocItem(
+                        name=struct_name,
+                        kind="struct",
+                        doc=doc,
+                    )
+                    # Extract fields from struct body
+                    if lines[j].strip().endswith("{"):
+                        k = j + 1
+                        while k < len(lines) and not lines[k].strip().startswith("}"):
+                            field_line = lines[k].strip()
+                            if field_line.startswith("pub "):
+                                # Look back for doc comment, skipping #[pyo3(...)] attrs
+                                field_doc = extract_doc_comment(lines, k - 1)
+                                field_match = re.match(
+                                    r"pub\s+(\w+):\s*(.+?),?\s*$", field_line
+                                )
+                                if field_match:
+                                    struct_item.fields.append(DocItem(
+                                        name=field_match.group(1),
+                                        kind="field",
+                                        doc=field_doc,
+                                        signature=field_match.group(2),
+                                    ))
+                            k += 1
+                    items.append(struct_item)
+
+        # Detect #[pymethods] impl blocks
+        if line == "#[pymethods]":
+            j = i + 1
+            while j < len(lines) and not lines[j].strip().startswith("impl "):
+                j += 1
+            if j < len(lines):
+                impl_match = re.search(r"impl (\w+)", lines[j])
+                if impl_match:
+                    impl_name = impl_match.group(1)
+                    # Find the matching struct in items
+                    target = None
+                    for item in items:
+                        if item.name == impl_name and item.kind == "struct":
+                            target = item
+                            break
+
+                    # Parse methods in the impl block
+                    brace_depth = 0
+                    k = j
+                    while k < len(lines):
+                        if "{" in lines[k]:
+                            brace_depth += lines[k].count("{")
+                        if "}" in lines[k]:
+                            brace_depth -= lines[k].count("}")
+                        if brace_depth == 0 and k > j:
+                            break
+
+                        mline = lines[k].strip()
+
+                        # Check for pub fn (but skip dunder methods)
+                        if mline.startswith("pub fn ") and "__" not in mline:
+                            # Extract doc comment, skipping attribute lines
+                            method_doc = extract_doc_comment(lines, k - 1)
+
+                            # Check for #[getter] in preceding attribute lines
+                            is_getter = False
+                            is_staticmethod = False
+                            is_new = False
+                            for back in range(max(0, k - 10), k):
+                                attr = lines[back].strip()
+                                if attr == "#[getter]":
+                                    is_getter = True
+                                elif attr == "#[staticmethod]":
+                                    is_staticmethod = True
+                                elif attr == "#[new]":
+                                    is_new = True
+                                elif attr.startswith("pub fn ") or (attr.startswith("///") and back < k - 1):
+                                    break  # stop looking back past another function or doc
+
+                            sig = extract_fn_signature(lines, k)
+
+                            name_match = re.search(r"pub fn (\w+)", mline)
+                            if name_match and method_doc:
+                                kind = "getter" if is_getter else "method"
+                                if is_new:
+                                    kind = "constructor"
+
+                                method = DocItem(
+                                    name=name_match.group(1),
+                                    kind=kind,
+                                    doc=method_doc,
+                                    signature=sig,
+                                )
+                                if target:
+                                    target.methods.append(method)
+                        k += 1
+
+        i += 1
+
+    return items
+
+
+def extract_fn_signature(lines: list[str], start: int) -> str:
+    """Extract a function signature starting from the given line."""
+    sig_lines = []
+    paren_depth = 0
+    i = start
+    while i < len(lines):
+        line = lines[i]
+        sig_lines.append(line.rstrip())
+        paren_depth += line.count("(") - line.count(")")
+        if paren_depth <= 0 and ")" in line:
+            # Check for return type on next line
+            if i + 1 < len(lines) and lines[i + 1].strip().startswith("->"):
+                sig_lines.append(lines[i + 1].rstrip())
+            break
+        i += 1
+
+    sig = " ".join(l.strip() for l in sig_lines)
+    # Clean up: extract just the function part
+    match = re.search(r"(pub fn \w+.*?)(?:\s*\{|\s*where)", sig)
+    if match:
+        return match.group(1).strip()
+    match = re.search(r"(pub fn \w+[^{]*)", sig)
+    if match:
+        return match.group(1).strip().rstrip("{").strip()
+    return sig
+
+
+def rust_type_to_display(ty: str) -> str:
+    """Convert a Rust type to a more readable display format."""
+    ty = ty.strip().rstrip(",")
+    # Option<T> -> T | None
+    m = re.match(r"Option<(.+)>$", ty)
+    if m:
+        inner = rust_type_to_display(m.group(1))
+        return f"{inner} | None"
+    # Vec<T> -> list[T]
+    m = re.match(r"Vec<(.+)>$", ty)
+    if m:
+        inner = rust_type_to_display(m.group(1))
+        return f"list[{inner}]"
+    # Either<A, B> -> A | B
+    m = re.match(r"Either<(.+),\s*(.+)>$", ty)
+    if m:
+        a = rust_type_to_display(m.group(1))
+        b = rust_type_to_display(m.group(2))
+        return f"{a} | {b}"
+    # Basic type mappings
+    mappings = {
+        "String": "str",
+        "&str": "str",
+        "u32": "int",
+        "u64": "int",
+        "i32": "int",
+        "i64": "int",
+        "usize": "int",
+        "bool": "bool",
+        "f32": "float",
+        "f64": "float",
+    }
+    return mappings.get(ty, ty)
+
+
+def items_to_markdown(items: list[DocItem], title: str) -> str:
+    """Convert extracted items to Markdown format."""
+    md = []
+    md.append(f"# {title}\n")
+    md.append("*Auto-generated from Rust doc comments.*\n")
+    md.append("---\n")
+
+    # Separate functions and structs
+    functions = [i for i in items if i.kind == "function"]
+    structs = [i for i in items if i.kind == "struct"]
+
+    if functions:
+        md.append("## Functions\n")
+        for func in functions:
+            md.append(f"### `{func.name}()`\n")
+            if func.doc:
+                md.append(f"{func.doc}\n")
+
+    for struct in structs:
+        md.append(f"## `{struct.name}`\n")
+        if struct.doc:
+            md.append(f"{struct.doc}\n")
+
+        # Constructors
+        constructors = [m for m in struct.methods if m.kind == "constructor"]
+        if constructors:
+            md.append("### Constructors\n")
+            for c in constructors:
+                md.append(f"#### `{struct.name}()`\n")
+                if c.doc:
+                    md.append(f"{c.doc}\n")
+
+        # Static methods
+        statics = [m for m in struct.methods if m.kind == "static"]
+        for s in statics:
+            md.append(f"### `{struct.name}.{s.name}()`\n")
+            if s.doc:
+                md.append(f"{s.doc}\n")
+
+        # Properties (fields + getters)
+        getters = [m for m in struct.methods if m.kind == "getter"]
+        if struct.fields or getters:
+            md.append("### Properties\n")
+            md.append("| Property | Type | Description |")
+            md.append("|----------|------|-------------|")
+            for f in struct.fields:
+                ty = rust_type_to_display(f.signature)
+                doc = f.doc.replace("\n", " ") if f.doc else ""
+                md.append(f"| `{f.name}` | `{ty}` | {doc} |")
+            for g in getters:
+                doc = g.doc.replace("\n", " ") if g.doc else ""
+                md.append(f"| `{g.name}` | — | {doc} |")
+            md.append("")
+
+        # Methods (non-getter, non-constructor)
+        methods = [m for m in struct.methods if m.kind == "method"]
+        if methods:
+            md.append("### Methods\n")
+            for m in methods:
+                md.append(f"#### `{m.name}()`\n")
+                if m.doc:
+                    md.append(f"{m.doc}\n")
+
+        md.append("---\n")
+
+    return "\n".join(md)
+
+
+def main():
+    if len(sys.argv) < 2:
+        print(f"Usage: {sys.argv[0]} <source_directory> [output_file]")
+        print(f"Example: {sys.argv[0]} python/src docs/python-api-generated.md")
+        sys.exit(1)
+
+    src_dir = Path(sys.argv[1])
+    output_file = sys.argv[2] if len(sys.argv) > 2 else None
+
+    if not src_dir.exists():
+        print(f"Error: directory {src_dir} does not exist")
+        sys.exit(1)
+
+    # Collect all .rs files
+    rs_files = sorted(src_dir.rglob("*.rs"))
+    all_items = []
+
+    for rs_file in rs_files:
+        items = parse_rust_file(rs_file)
+        all_items.extend(items)
+
+    if not all_items:
+        print("No documented items found.")
+        sys.exit(1)
+
+    # Determine title from directory name
+    dir_name = src_dir.parent.name if src_dir.name == "src" else src_dir.name
+    title_map = {"python": "Python API", "node": "JavaScript API"}
+    title = title_map.get(dir_name, f"{dir_name} API")
+
+    md = items_to_markdown(all_items, title)
+
+    if output_file:
+        Path(output_file).write_text(md)
+        print(f"Generated {output_file} ({len(all_items)} items)")
+    else:
+        print(md)
+
+
+if __name__ == "__main__":
+    main()