diff --git a/docs/en/_static/css/config_generator.css b/docs/en/_static/css/config_generator.css
new file mode 100644
index 0000000000..2c41719332
--- /dev/null
+++ b/docs/en/_static/css/config_generator.css
@@ -0,0 +1,216 @@
+/* ================================================================
+   LMDeploy Interactive Configuration Generator – pill-bar layout
+   Matches the SGLang Cookbook segmented-control style
+   ================================================================ */
+
+/* Wrapper: full content width, no card background */
+.cg-wrapper {
+    width: 100%;
+    margin: 0;
+    padding: 0;
+}
+
+/* ── Dimension row ─────────────────────────────────────────────── */
+.cg-row {
+    margin-bottom: 1.25em;
+}
+
+.cg-label {
+    font-weight: 600;
+    font-size: 0.95em;
+    margin-bottom: 0.4em;
+    color: var(--pst-color-text-base, #24292e);
+}
+
+/* ── Pill bar (segmented control) ──────────────────────────────── */
+.cg-pill-bar {
+    display: flex;
+    flex-wrap: wrap;
+    width: 100%;
+    border: 1px solid var(--pst-color-border, #d1d5db);
+    border-radius: 6px;
+    overflow: hidden;
+    background: var(--pst-color-surface, #ffffff);
+}
+
+.cg-pill {
+    flex: 1 1 0;
+    min-width: 0;
+    padding: 0.55em 0.4em;
+    margin: 0;
+    border: none;
+    border-right: 1px solid var(--pst-color-border, #d1d5db);
+    background: transparent;
+    color: var(--pst-color-text-base, #24292e);
+    font-size: 0.88em;
+    font-weight: 500;
+    cursor: pointer;
+    text-align: center;
+    transition: background 0.15s ease, color 0.15s ease;
+    white-space: nowrap;
+    overflow: hidden;
+    text-overflow: ellipsis;
+    line-height: 1.4;
+}
+
+.cg-pill:last-child {
+    border-right: none;
+}
+
+.cg-pill:hover {
+    background: rgba(3, 102, 214, 0.08);
+}
+
+.cg-pill.active {
+    background: #0366d6;
+    color: #ffffff;
+    font-weight: 600;
+}
+
+.cg-pill.active:hover {
+    background: #0256c2;
+}
+
+/* ── Command output section ────────────────────────────────────── */
+.cg-command-section {
+    margin-top: 1.5em;
+}
+
+.cg-command-label {
+    font-weight: 600;
+    font-size: 0.95em;
+    margin-bottom: 0.4em;
+    color: var(--pst-color-text-base, #24292e);
+}
+
+.cg-command-box {
+    background: #1e1e1e;
+    border-radius: 6px;
+    padding: 1em 1em 1em 1.2em;
+    position: relative;
+    width: 100%;
+    box-sizing: border-box;
+}
+
+.cg-command-box pre {
+    margin: 0;
+    padding: 0;
+    padding-right: 4.5em;         /* space for the copy button */
+    background: transparent;
+    overflow-x: auto;
+}
+
+.cg-command-box code {
+    color: #d4d4d4;
+    font-family: 'SFMono-Regular', 'Consolas', 'Liberation Mono', 'Menlo', monospace;
+    font-size: 0.88em;
+    line-height: 1.6;
+    white-space: pre;
+}
+
+.cg-copy-btn {
+    position: absolute;
+    top: 0.6em;
+    right: 0.6em;
+    background: #0366d6;
+    color: #ffffff;
+    border: none;
+    border-radius: 4px;
+    padding: 0.35em 0.9em;
+    cursor: pointer;
+    font-size: 0.8em;
+    font-weight: 500;
+    transition: background 0.2s ease;
+}
+
+.cg-copy-btn:hover {
+    background: #0256c2;
+}
+
+.cg-copy-btn:active {
+    background: #014a9e;
+}
+
+/* ── Responsive: stack pills on narrow screens ─────────────────── */
+@media (max-width: 640px) {
+    .cg-pill-bar {
+        flex-direction: column;
+    }
+
+    .cg-pill {
+        border-right: none;
+        border-bottom: 1px solid var(--pst-color-border, #d1d5db);
+    }
+
+    .cg-pill:last-child {
+        border-bottom: none;
+    }
+}
+
+/* ── Dark-mode overrides (sphinx-book-theme data-theme) ────────── */
+html[data-theme="dark"] .cg-label,
+html[data-theme="dark"] .cg-command-label {
+    color: #f0f6fc;
+}
+
+html[data-theme="dark"] .cg-pill-bar {
+    border-color: #30363d;
+    background: #161b22;
+}
+
+html[data-theme="dark"] .cg-pill {
+    color: #c9d1d9;
+    border-color: #30363d;
+}
+
+html[data-theme="dark"] .cg-pill:hover {
+    background: rgba(88, 166, 255, 0.12);
+}
+
+html[data-theme="dark"] .cg-pill.active {
+    background: #1f6feb;
+    color: #ffffff;
+}
+
+html[data-theme="dark"] .cg-command-box {
+    background: #0d1117;
+}
+
+html[data-theme="dark"] .cg-command-box code {
+    color: #c9d1d9;
+}
+
+/* Also handle prefers-color-scheme for themes without data-theme */
+@media (prefers-color-scheme: dark) {
+    .cg-label,
+    .cg-command-label {
+        color: #f0f6fc;
+    }
+
+    .cg-pill-bar {
+        border-color: #30363d;
+        background: #161b22;
+    }
+
+    .cg-pill {
+        color: #c9d1d9;
+        border-color: #30363d;
+    }
+
+    .cg-pill:hover {
+        background: rgba(88, 166, 255, 0.12);
+    }
+
+    .cg-pill.active {
+        background: #1f6feb;
+        color: #ffffff;
+    }
+
+    .cg-command-box {
+        background: #0d1117;
+    }
+
+    .cg-command-box code {
+        color: #c9d1d9;
+    }
+}
diff --git a/docs/en/_static/js/config_generator.js b/docs/en/_static/js/config_generator.js
new file mode 100644
index 0000000000..e81745a19a
--- /dev/null
+++ b/docs/en/_static/js/config_generator.js
@@ -0,0 +1,165 @@
+// LMDeploy Interactive Configuration Generator — Generic Engine
+// Model-specific configurations are loaded from js/models/*.js via
+// the window.LMDeployModelConfigs global registry.
+(function() {
+    'use strict';
+
+    function initConfigGenerator() {
+        var container = document.getElementById('lmdeploy-config-generator');
+        if (!container) return;
+
+        // ── Read model config from registry ──────────────────────────
+        var configKey = container.getAttribute('data-model-config') || 'qwen3';
+        var configs = window.LMDeployModelConfigs || {};
+        var config = configs[configKey];
+        if (!config) {
+            container.textContent = 'Unknown model config: ' + configKey +
+                '. Available: ' + Object.keys(configs).join(', ');
+            return;
+        }
+
+        // ── TP estimation (generic) ─────────────────────────────────
+        function getRecommendedTP(sel) {
+            var mem = (config.gpuMem || {})[sel.hardware] || 80;
+            var need = (config.modelMem || {})[sel.model_size] || 16;
+            if (sel.quantization === 'awq' || sel.quantization === 'gptq') {
+                need *= 0.3;
+            } else if (sel.quantization === 'fp8') {
+                need *= 0.55;
+            }
+            var tp = 1;
+            while (tp * mem < need * 1.15 && tp < 8) {
+                tp *= 2;
+            }
+            return tp;
+        }
+
+        // ── Generate command ────────────────────────────────────────
+        function generateCommand() {
+            var sel = {};
+            container.querySelectorAll('.cg-pill-bar').forEach(function(bar) {
+                var key = bar.getAttribute('data-key');
+                var active = bar.querySelector('.cg-pill.active');
+                if (active) sel[key] = active.getAttribute('data-value');
+            });
+
+            var modelPath = config.buildModelPath(sel);
+            var tp = getRecommendedTP(sel);
+            var parts = ['lmdeploy serve api_server ' + modelPath];
+
+            if (tp > 1) parts.push('--tp ' + tp);
+
+            var extraFlags = config.buildExtraFlags ? config.buildExtraFlags(sel) : [];
+            parts = parts.concat(extraFlags);
+
+            if (parts.length <= 2) return parts.join(' ');
+            return parts[0] + ' \\\n' +
+                parts.slice(1).map(function(p) { return '  ' + p; }).join(' \\\n');
+        }
+
+        // ── Update command display ──────────────────────────────────
+        function updateCommand() {
+            var el = container.querySelector('.cg-generated-command');
+            if (el) el.textContent = generateCommand();
+        }
+
+        // ── Render a single dimension row ───────────────────────────
+        function renderDimension(dim) {
+            var row = document.createElement('div');
+            row.className = 'cg-row';
+
+            var label = document.createElement('div');
+            label.className = 'cg-label';
+            label.textContent = dim.label;
+            row.appendChild(label);
+
+            var bar = document.createElement('div');
+            bar.className = 'cg-pill-bar';
+            bar.setAttribute('data-key', dim.key);
+
+            dim.options.forEach(function(opt) {
+                var pill = document.createElement('button');
+                pill.className = 'cg-pill';
+                pill.setAttribute('data-value', opt.value);
+                pill.textContent = opt.label;
+                if (opt.value === dim.default) pill.classList.add('active');
+
+                pill.addEventListener('click', function() {
+                    bar.querySelectorAll('.cg-pill').forEach(function(p) {
+                        p.classList.remove('active');
+                    });
+                    pill.classList.add('active');
+                    updateCommand();
+                });
+
+                bar.appendChild(pill);
+            });
+
+            row.appendChild(bar);
+            return row;
+        }
+
+        // ── Build the full UI ───────────────────────────────────────
+        var wrapper = document.createElement('div');
+        wrapper.className = 'cg-wrapper';
+
+        config.dimensions.forEach(function(dim) {
+            wrapper.appendChild(renderDimension(dim));
+        });
+
+        // Command output section
+        var cmdSection = document.createElement('div');
+        cmdSection.className = 'cg-command-section';
+
+        var cmdLabel = document.createElement('div');
+        cmdLabel.className = 'cg-command-label';
+        cmdLabel.textContent = 'Generated Command';
+        cmdSection.appendChild(cmdLabel);
+
+        var cmdBox = document.createElement('div');
+        cmdBox.className = 'cg-command-box';
+
+        var pre = document.createElement('pre');
+        var code = document.createElement('code');
+        code.className = 'cg-generated-command';
+        pre.appendChild(code);
+        cmdBox.appendChild(pre);
+
+        var copyBtn = document.createElement('button');
+        copyBtn.className = 'cg-copy-btn';
+        copyBtn.textContent = 'Copy';
+        copyBtn.addEventListener('click', function() {
+            var text = code.textContent;
+            navigator.clipboard.writeText(text).then(function() {
+                copyBtn.textContent = 'Copied!';
+                setTimeout(function() { copyBtn.textContent = 'Copy'; }, 2000);
+            }).catch(function() {
+                // Fallback for older browsers
+                var ta = document.createElement('textarea');
+                ta.value = text;
+                ta.style.position = 'fixed';
+                ta.style.left = '-9999px';
+                document.body.appendChild(ta);
+                ta.select();
+                document.execCommand('copy');
+                document.body.removeChild(ta);
+                copyBtn.textContent = 'Copied!';
+                setTimeout(function() { copyBtn.textContent = 'Copy'; }, 2000);
+            });
+        });
+        cmdBox.appendChild(copyBtn);
+
+        cmdSection.appendChild(cmdBox);
+        wrapper.appendChild(cmdSection);
+
+        container.appendChild(wrapper);
+        updateCommand();
+    }
+
+    // Initialize when DOM is ready
+    if (document.readyState === 'loading') {
+        document.addEventListener('DOMContentLoaded', initConfigGenerator);
+    } else {
+        initConfigGenerator();
+    }
+})();
diff --git a/docs/en/_static/js/models/deepseek.js b/docs/en/_static/js/models/deepseek.js
new file mode 100644
index 0000000000..5c386d83ff
--- /dev/null
+++ b/docs/en/_static/js/models/deepseek.js
@@ -0,0 +1,68 @@
+// models/deepseek.js — DeepSeek model configuration for LMDeploy Config Generator
+(function() {
+    'use strict';
+    window.LMDeployModelConfigs = window.LMDeployModelConfigs || {};
+
+    window.LMDeployModelConfigs['deepseek'] = {
+        name: 'DeepSeek',
+
+        dimensions: [
+            {
+                key: 'hardware', label: 'Hardware Platform', default: 'H800',
+                options: [
+                    { value: 'A100', label: 'A100(80G)' },
+                    { value: 'H800', label: 'H800(80G)' },
+                    { value: 'H200', label: 'H200(140G)' }
+                ]
+            },
+            {
+                key: 'model_size', label: 'Model Version', default: 'V3',
+                options: [
+                    { value: 'V2-Lite', label: 'V2 Lite (16B)' },
+                    { value: 'V2',      label: 'V2 (236B)' },
+                    { value: 'V2.5',    label: 'V2.5 (236B)' },
+                    { value: 'V3',      label: 'V3 (685B)' },
+                    { value: 'V3.2',    label: 'V3.2 (685B)' }
+                ]
+            },
+            {
+                key: 'quantization', label: 'Quantization', default: 'auto',
+                options: [
+                    { value: 'auto', label: 'Auto (BF16)' }
+                ]
+            },
+            {
+                key: 'reasoning_parser', label: 'Reasoning Parser', default: 'disabled',
+                options: [
+                    { value: 'disabled', label: 'Disabled' },
+                    { value: 'enabled',  label: 'Enabled' }
+                ]
+            }
+        ],
+
+        gpuMem: { 'A100': 80, 'H800': 80, 'H200': 140 },
+
+        modelMem: {
+            'V2-Lite': 32, 'V2': 440, 'V2.5': 440,
+            'V3': 1300, 'V3.2': 1300
+        },
+
+        buildModelPath: function(sel) {
+            var map = {
+                'V2-Lite': 'deepseek-ai/DeepSeek-V2-Lite-Chat',
+                'V2':      'deepseek-ai/DeepSeek-V2-Chat',
+                'V2.5':    'deepseek-ai/DeepSeek-V2.5',
+                'V3':      'deepseek-ai/DeepSeek-V3',
+                'V3.2':    'deepseek-ai/DeepSeek-V3-0324'
+            };
+            return map[sel.model_size] || 'deepseek-ai/DeepSeek-V3';
+        },
+
+        buildExtraFlags: function(sel) {
+            var flags = [];
+            flags.push('--backend pytorch');
+            if (sel.reasoning_parser === 'enabled') flags.push('--reasoning-parser deepseek-r1');
+            return flags;
+        }
+    };
+})();
diff --git a/docs/en/_static/js/models/glm4.js b/docs/en/_static/js/models/glm4.js
new file mode 100644
index 0000000000..719a795b42
--- /dev/null
+++ b/docs/en/_static/js/models/glm4.js
@@ -0,0 +1,70 @@
+// models/glm4.js — GLM-4 model configuration for LMDeploy Config Generator
+(function() {
+    'use strict';
+    window.LMDeployModelConfigs = window.LMDeployModelConfigs || {};
+
+    window.LMDeployModelConfigs['glm4'] = {
+        name: 'GLM-4',
+
+        dimensions: [
+            {
+                key: 'hardware', label: 'Hardware Platform', default: 'A100',
+                options: [
+                    { value: 'A100', label: 'A100(80G)' },
+                    { value: 'H800', label: 'H800(80G)' },
+                    { value: 'H200', label: 'H200(140G)' }
+                ]
+            },
+            {
+                key: 'model_size', label: 'Model Version', default: 'GLM-4-9B',
+                options: [
+                    { value: 'GLM-4-9B',          label: 'GLM-4 (9B)' },
+                    { value: 'GLM-4-0414-9B',     label: 'GLM-4-0414 (9B)' },
+                    { value: 'GLM-4.5-355B',      label: 'GLM-4.5 (355B)' },
+                    { value: 'GLM-4.5-Air-106B',  label: 'GLM-4.5-Air (106B)' },
+                    { value: 'GLM-4.7-Flash-30B', label: 'GLM-4.7-Flash (30B)' },
+                    { value: 'GLM-5-754B',        label: 'GLM-5 (754B)' }
+                ]
+            },
+            {
+                key: 'quantization', label: 'Quantization', default: 'auto',
+                options: [
+                    { value: 'auto', label: 'Auto (BF16)' },
+                    { value: 'awq',  label: 'AWQ (W4A16)' }
+                ]
+            },
+            {
+                key: 'category', label: 'Categories', default: 'chat',
+                options: [
+                    { value: 'chat', label: 'Chat' }
+                ]
+            }
+        ],
+
+        gpuMem: { 'A100': 80, 'H800': 80, 'H200': 140 },
+
+        modelMem: {
+            'GLM-4-9B': 18, 'GLM-4-0414-9B': 18,
+            'GLM-4.5-355B': 700, 'GLM-4.5-Air-106B': 212,
+            'GLM-4.7-Flash-30B': 60, 'GLM-5-754B': 1400
+        },
+
+        buildModelPath: function(sel) {
+            var map = {
+                'GLM-4-9B':          'THUDM/glm-4-9b-chat',
+                'GLM-4-0414-9B':     'THUDM/GLM-4-0414-9B-Chat',
+                'GLM-4.5-355B':      'THUDM/GLM-4.5-355B-Chat',
+                'GLM-4.5-Air-106B':  'THUDM/GLM-4.5-Air-106B-Chat',
+                'GLM-4.7-Flash-30B': 'THUDM/GLM-4.7-Flash-30B',
+                'GLM-5-754B':        'THUDM/GLM-5-754B'
+            };
+            return map[sel.model_size] || 'THUDM/glm-4-9b-chat';
+        },
+
+        buildExtraFlags: function(sel) {
+            var flags = [];
+            if (sel.quantization === 'awq') flags.push('--model-format awq');
+            return flags;
+        }
+    };
+})();
diff --git a/docs/en/_static/js/models/internlm.js b/docs/en/_static/js/models/internlm.js
new file mode 100644
index 0000000000..b02b368568
--- /dev/null
+++ b/docs/en/_static/js/models/internlm.js
@@ -0,0 +1,93 @@
+// models/internlm.js — InternLM model configuration for LMDeploy Config Generator
+(function() {
+    'use strict';
+    window.LMDeployModelConfigs = window.LMDeployModelConfigs || {};
+
+    window.LMDeployModelConfigs['internlm'] = {
+        name: 'InternLM',
+
+        dimensions: [
+            {
+                key: 'hardware', label: 'Hardware Platform', default: 'A100',
+                options: [
+                    { value: 'A100', label: 'A100(80G)' },
+                    { value: 'H800', label: 'H800(80G)' },
+                    { value: 'H200', label: 'H200(140G)' }
+                ]
+            },
+            {
+                key: 'model_size', label: 'Model Version', default: 'InternLM3-8B',
+                options: [
+                    { value: 'InternLM2-7B',   label: 'InternLM2 (7B)' },
+                    { value: 'InternLM2-20B',  label: 'InternLM2 (20B)' },
+                    { value: 'InternLM2.5-7B', label: 'InternLM2.5 (7B)' },
+                    { value: 'InternLM3-8B',   label: 'InternLM3 (8B)' }
+                ]
+            },
+            {
+                key: 'quantization', label: 'Quantization', default: 'auto',
+                options: [
+                    { value: 'auto', label: 'Auto (BF16)' },
+                    { value: 'awq',  label: 'AWQ (W4A16)' },
+                    { value: 'kv8',  label: 'KV Cache INT8' }
+                ]
+            },
+            {
+                key: 'category', label: 'Categories', default: 'chat',
+                options: [
+                    { value: 'base', label: 'Base' },
+                    { value: 'chat', label: 'Chat' }
+                ]
+            },
+            {
+                key: 'reasoning_parser', label: 'Reasoning Parser', default: 'disabled',
+                options: [
+                    { value: 'disabled', label: 'Disabled' },
+                    { value: 'enabled',  label: 'Enabled' }
+                ]
+            },
+            {
+                key: 'tool_call_parser', label: 'Tool Call Parser', default: 'disabled',
+                options: [
+                    { value: 'disabled', label: 'Disabled' },
+                    { value: 'enabled',  label: 'Enabled' }
+                ]
+            }
+        ],
+
+        gpuMem: { 'A100': 80, 'H800': 80, 'H200': 140 },
+
+        modelMem: {
+            'InternLM2-7B': 14, 'InternLM2-20B': 40,
+            'InternLM2.5-7B': 14, 'InternLM3-8B': 16
+        },
+
+        buildModelPath: function(sel) {
+            var chatMap = {
+                'InternLM2-7B':   'internlm/internlm2-chat-7b',
+                'InternLM2-20B':  'internlm/internlm2-chat-20b',
+                'InternLM2.5-7B': 'internlm/internlm2_5-7b-chat',
+                'InternLM3-8B':   'internlm/internlm3-8b-instruct'
+            };
+            var baseMap = {
+                'InternLM2-7B':   'internlm/internlm2-7b',
+                'InternLM2-20B':  'internlm/internlm2-20b',
+                'InternLM2.5-7B': 'internlm/internlm2_5-7b',
+                'InternLM3-8B':   'internlm/internlm3-8b'
+            };
+            if (sel.category === 'base') {
+                return baseMap[sel.model_size] || 'internlm/internlm3-8b';
+            }
+            return chatMap[sel.model_size] || 'internlm/internlm3-8b-instruct';
+        },
+
+        buildExtraFlags: function(sel) {
+            var flags = [];
+            if (sel.quantization === 'awq') flags.push('--model-format awq');
+            else if (sel.quantization === 'kv8') flags.push('--quant-policy 8');
+            if (sel.reasoning_parser === 'enabled') flags.push('--reasoning-parser intern-s1');
+            if (sel.tool_call_parser === 'enabled') flags.push('--tool-call-parser internlm');
+            return flags;
+        }
+    };
+})();
diff --git a/docs/en/_static/js/models/qwen3.js b/docs/en/_static/js/models/qwen3.js
new file mode 100644
index 0000000000..f167658933
--- /dev/null
+++ b/docs/en/_static/js/models/qwen3.js
@@ -0,0 +1,90 @@
+// models/qwen3.js — Qwen3 model configuration for LMDeploy Config Generator
+(function() {
+    'use strict';
+    window.LMDeployModelConfigs = window.LMDeployModelConfigs || {};
+
+    window.LMDeployModelConfigs['qwen3'] = {
+        name: 'Qwen3',
+
+        dimensions: [
+            {
+                key: 'hardware', label: 'Hardware Platform', default: 'A100',
+                options: [
+                    { value: 'A100', label: 'A100(80G)' },
+                    { value: 'H800', label: 'H800(80G)' },
+                    { value: 'H200', label: 'H200(140G)' },
+                    { value: 'V100', label: 'V100(32G)' }
+                ]
+            },
+            {
+                key: 'model_size', label: 'Model Size', default: '8B',
+                options: [
+                    { value: '235B-A22B', label: '235B MoE' },
+                    { value: '30B-A3B',   label: '30B MoE' },
+                    { value: '32B',       label: '32B' },
+                    { value: '14B',       label: '14B' },
+                    { value: '8B',        label: '8B' },
+                    { value: '4B',        label: '4B' },
+                    { value: '1.7B',      label: '1.7B' },
+                    { value: '0.6B',      label: '0.6B' }
+                ]
+            },
+            {
+                key: 'quantization', label: 'Quantization', default: 'auto',
+                options: [
+                    { value: 'auto', label: 'Auto' },
+                    { value: 'awq',  label: 'AWQ (W4A16)' },
+                    { value: 'gptq', label: 'GPTQ (W4A16)' },
+                    { value: 'fp8',  label: 'FP8' }
+                ]
+            },
+            {
+                key: 'category', label: 'Categories', default: 'instruct',
+                options: [
+                    { value: 'base',     label: 'Base' },
+                    { value: 'instruct', label: 'Instruct' },
+                    { value: 'thinking', label: 'Thinking' }
+                ]
+            },
+            {
+                key: 'reasoning_parser', label: 'Reasoning Parser', default: 'disabled',
+                options: [
+                    { value: 'disabled', label: 'Disabled' },
+                    { value: 'enabled',  label: 'Enabled' }
+                ]
+            },
+            {
+                key: 'tool_call_parser', label: 'Tool Call Parser', default: 'disabled',
+                options: [
+                    { value: 'disabled', label: 'Disabled' },
+                    { value: 'enabled',  label: 'Enabled' }
+                ]
+            }
+        ],
+
+        // GPU memory (GB) for TP estimation
+        gpuMem: { 'A100': 80, 'H800': 80, 'H200': 140, 'V100': 32 },
+
+        // Approximate BF16 model weight memory (GB)
+        modelMem: {
+            '235B-A22B': 440, '30B-A3B': 60, '32B': 64,
+            '14B': 28, '8B': 16, '4B': 8, '1.7B': 4, '0.6B': 2
+        },
+
+        buildModelPath: function(sel) {
+            var base = 'Qwen/Qwen3-' + sel.model_size;
+            if (sel.category === 'instruct') base += '-Instruct';
+            else if (sel.category === 'thinking') base += '-Thinking';
+            return base;
+        },
+
+        buildExtraFlags: function(sel) {
+            var flags = [];
+            if (sel.quantization === 'awq') flags.push('--model-format awq');
+            else if (sel.quantization === 'gptq') flags.push('--model-format gptq');
+            if (sel.reasoning_parser === 'enabled') flags.push('--reasoning-parser qwen-qwq');
+            if (sel.tool_call_parser === 'enabled') flags.push('--tool-call-parser qwen3');
+            return flags;
+        }
+    };
+})();
diff --git a/docs/en/best_practice/deepseek/deepseek.md b/docs/en/best_practice/deepseek/deepseek.md
new file mode 100644
index 0000000000..dfb238cc30
--- /dev/null
+++ b/docs/en/best_practice/deepseek/deepseek.md
@@ -0,0 +1,59 @@
+# DeepSeek
+
+## 1. Model Introduction
+
+DeepSeek is a series of powerful open-source large language models developed by DeepSeek AI. The series features Mixture-of-Experts (MoE) architecture for efficient inference with massive parameter counts.
+
+|      Model       | Parameters | Architecture |
+| :--------------: | :--------: | :----------: |
+| DeepSeek-V2-Lite |    16B     |     MoE      |
+|   DeepSeek-V2    |    236B    |     MoE      |
+|  DeepSeek-V2.5   |    236B    |     MoE      |
+|   DeepSeek-V3    |    685B    |     MoE      |
+|  DeepSeek-V3.2   |    685B    |     MoE      |
+
+Key features:
+
+- **MoE Architecture**: Efficient inference through sparse activation of expert modules.
+- **Large-scale Models**: Up to 685B total parameters with efficient activated parameter counts.
+- **Strong Reasoning**: Deep reasoning capabilities, especially with DeepSeek-R1 reasoning mode.
+
+For more details, please refer to the [DeepSeek GitHub Repository](https://github.com/deepseek-ai).
+
+## 2. Model Deployment
+
+### 2.1 Basic Configuration
+
+DeepSeek models are supported by LMDeploy with the PyTorch backend. Use the interactive generator below to create your deployment command.
+
+**Interactive Command Generator**:
+
+```{raw} html
+<div id="lmdeploy-config-generator" data-model-config="deepseek"></div>
+```
+
+### 2.2 Configuration Tips
+
+- **Backend**: DeepSeek models use the PyTorch backend (`--backend pytorch`).
+- **Tensor Parallelism (`--tp`)**: DeepSeek-V3 (685B) requires at least 8×80G GPUs. Smaller models like V2-Lite (16B) can run on a single GPU.
+- **Session Length (`--session-len`)**: Set explicitly to conserve memory, e.g., `--session-len 32768`.
+- **Cache Management (`--cache-max-entry-count`)**: Lower this value if you encounter OOM errors.
+
+## 3. Model Invocation
+
+### 3.1 Basic Usage
+
+For basic API usage, please refer to:
+
+- [OpenAI Compatible Server](../../llm/api_server.md)
+- [Pipeline (Offline Inference)](../../llm/pipeline.md)
+
+### 3.2 Reasoning Parser
+
+DeepSeek models support reasoning mode via the DeepSeek-R1 reasoning parser:
+
+```shell
+lmdeploy serve api_server deepseek-ai/DeepSeek-V3 --backend pytorch --reasoning-parser deepseek-r1
+```
+
+For detailed usage and examples, see [Reasoning Outputs](../../llm/api_server_reasoning.md).
diff --git a/docs/en/best_practice/deepseek/index.rst b/docs/en/best_practice/deepseek/index.rst
new file mode 100644
index 0000000000..f7306fcc8c
--- /dev/null
+++ b/docs/en/best_practice/deepseek/index.rst
@@ -0,0 +1,7 @@
+DeepSeek
+========================
+
+.. toctree::
+   :maxdepth: 1
+
+   deepseek.md
diff --git a/docs/en/best_practice/glm/glm4.md b/docs/en/best_practice/glm/glm4.md
new file mode 100644
index 0000000000..01cefbf693
--- /dev/null
+++ b/docs/en/best_practice/glm/glm4.md
@@ -0,0 +1,51 @@
+# GLM-4
+
+## 1. Model Introduction
+
+GLM-4 is a series of large language models developed by Tsinghua University (THUDM). The series spans from compact 9B models to large-scale 754B models, offering strong multilingual and reasoning capabilities.
+
+|     Model     | Parameters | Architecture |
+| :-----------: | :--------: | :----------: |
+|     GLM-4     |     9B     |    Dense     |
+|  GLM-4-0414   |     9B     |    Dense     |
+|    GLM-4.5    |    355B    |     MoE      |
+|  GLM-4.5-Air  |    106B    |     MoE      |
+| GLM-4.7-Flash |    30B     |    Dense     |
+|     GLM-5     |    754B    |     MoE      |
+
+Key features:
+
+- **Scalable Architecture**: From 9B dense to 754B MoE models.
+- **Strong Multilingual Support**: Excellent Chinese and English capabilities.
+- **Tool Calling**: Built-in function calling support.
+- **Vision-Language Models**: GLM-4V variants available for multimodal tasks.
+
+For more details, please refer to the [GLM GitHub Repository](https://github.com/THUDM).
+
+## 2. Model Deployment
+
+### 2.1 Basic Configuration
+
+GLM-4 models are supported by LMDeploy with both TurboMind (9B models) and PyTorch backends. Use the interactive generator below to create your deployment command.
+
+**Interactive Command Generator**:
+
+```{raw} html
+<div id="lmdeploy-config-generator" data-model-config="glm4"></div>
+```
+
+### 2.2 Configuration Tips
+
+- **Backend Selection**: GLM-4 (9B) works with both TurboMind and PyTorch backends. Larger models (GLM-4.5, GLM-5) require the PyTorch backend.
+- **Tensor Parallelism (`--tp`)**: GLM-4 (9B) can run on a single 80G GPU. GLM-4.5 (355B) requires multi-GPU setups.
+- **Quantization**: AWQ quantization is supported for GLM-4 (9B) models on TurboMind backend.
+- **Session Length (`--session-len`)**: Set explicitly to conserve memory, e.g., `--session-len 32768`.
+
+## 3. Model Invocation
+
+### 3.1 Basic Usage
+
+For basic API usage, please refer to:
+
+- [OpenAI Compatible Server](../../llm/api_server.md)
+- [Pipeline (Offline Inference)](../../llm/pipeline.md)
diff --git a/docs/en/best_practice/glm/index.rst b/docs/en/best_practice/glm/index.rst
new file mode 100644
index 0000000000..e7e0452b77
--- /dev/null
+++ b/docs/en/best_practice/glm/index.rst
@@ -0,0 +1,7 @@
+GLM
+========================
+
+.. toctree::
+   :maxdepth: 1
+
+   glm4.md
diff --git a/docs/en/best_practice/internlm/index.rst b/docs/en/best_practice/internlm/index.rst
new file mode 100644
index 0000000000..131e8f2e39
--- /dev/null
+++ b/docs/en/best_practice/internlm/index.rst
@@ -0,0 +1,7 @@
+InternLM
+========================
+
+.. toctree::
+   :maxdepth: 1
+
+   internlm.md
diff --git a/docs/en/best_practice/internlm/internlm.md b/docs/en/best_practice/internlm/internlm.md
new file mode 100644
index 0000000000..407c0d4f25
--- /dev/null
+++ b/docs/en/best_practice/internlm/internlm.md
@@ -0,0 +1,69 @@
+# InternLM
+
+## 1. Model Introduction
+
+InternLM is a series of large language models developed by Shanghai AI Laboratory and SenseTime. The series spans multiple generations with progressive improvements in reasoning, code generation, and tool usage.
+
+|     Model      | Parameters | Architecture |
+| :------------: | :--------: | :----------: |
+|  InternLM2-7B  |     7B     |    Dense     |
+| InternLM2-20B  |    20B     |    Dense     |
+| InternLM2.5-7B |     7B     |    Dense     |
+|  InternLM3-8B  |     8B     |    Dense     |
+
+Key features:
+
+- **Strong Reasoning**: Excellent performance on reasoning and math benchmarks.
+- **Tool Calling**: Built-in function calling and agent capabilities.
+- **Code Generation**: Strong code generation and understanding abilities.
+- **Long Context**: Support for extended context windows.
+
+For more details, please refer to the [InternLM GitHub Repository](https://github.com/InternLM/InternLM).
+
+## 2. Model Deployment
+
+### 2.1 Basic Configuration
+
+InternLM models are fully supported by LMDeploy with both TurboMind and PyTorch backends. Use the interactive generator below to create your deployment command.
+
+**Interactive Command Generator**:
+
+```{raw} html
+<div id="lmdeploy-config-generator" data-model-config="internlm"></div>
+```
+
+### 2.2 Configuration Tips
+
+- **Backend Selection**: TurboMind is the default high-performance backend. Use PyTorch backend (`--backend pytorch`) for broader compatibility.
+- **Tensor Parallelism (`--tp`)**: InternLM2-20B may require 2 GPUs for BF16 inference. Smaller models (7B/8B) fit on a single GPU.
+- **Quantization**: AWQ quantization (`--model-format awq`) and KV cache INT8 (`--quant-policy 8`) are supported.
+- **Session Length (`--session-len`)**: Set explicitly to conserve memory, e.g., `--session-len 32768`.
+
+## 3. Model Invocation
+
+### 3.1 Basic Usage
+
+For basic API usage, please refer to:
+
+- [OpenAI Compatible Server](../../llm/api_server.md)
+- [Pipeline (Offline Inference)](../../llm/pipeline.md)
+
+### 3.2 Reasoning Parser
+
+InternLM models support reasoning mode via the `intern-s1` reasoning parser:
+
+```shell
+lmdeploy serve api_server internlm/internlm3-8b-instruct --reasoning-parser intern-s1
+```
+
+For detailed usage and examples, see [Reasoning Outputs](../../llm/api_server_reasoning.md).
+
+### 3.3 Tool Calling
+
+InternLM supports tool calling capabilities. Enable the tool call parser:
+
+```shell
+lmdeploy serve api_server internlm/internlm3-8b-instruct --tool-call-parser internlm
+```
+
+For detailed usage and examples, see [Tools](../../llm/api_server_tools.md).
diff --git a/docs/en/best_practice/qwen/index.rst b/docs/en/best_practice/qwen/index.rst
new file mode 100644
index 0000000000..2313cd58c3
--- /dev/null
+++ b/docs/en/best_practice/qwen/index.rst
@@ -0,0 +1,7 @@
+Qwen
+========================
+
+.. toctree::
+   :maxdepth: 1
+
+   qwen3.md
diff --git a/docs/en/best_practice/qwen/qwen3.md b/docs/en/best_practice/qwen/qwen3.md
new file mode 100644
index 0000000000..faf96e771b
--- /dev/null
+++ b/docs/en/best_practice/qwen/qwen3.md
@@ -0,0 +1,77 @@
+# Qwen3
+
+## 1. Model Introduction
+
+Qwen3 is the latest generation of large language models in the Qwen series developed by Alibaba, offering significant improvements in instruction following, reasoning, multilingual understanding, and tool usage.
+
+The Qwen3 series provides models in both **Dense** and **MoE** (Mixture-of-Experts) architectures:
+
+|      Model      | Type  | Parameters | Active Parameters |
+| :-------------: | :---: | :--------: | :---------------: |
+|   Qwen3-0.6B    | Dense |    0.6B    |       0.6B        |
+|   Qwen3-1.7B    | Dense |    1.7B    |       1.7B        |
+|    Qwen3-4B     | Dense |     4B     |        4B         |
+|    Qwen3-8B     | Dense |     8B     |        8B         |
+|    Qwen3-14B    | Dense |    14B     |        14B        |
+|    Qwen3-32B    | Dense |    32B     |        32B        |
+|  Qwen3-30B-A3B  |  MoE  |    30B     |        3B         |
+| Qwen3-235B-A22B |  MoE  |    235B    |        22B        |
+
+Key features:
+
+- **Extended context length**: Up to 256K tokens for long-context understanding and reasoning.
+- **Flexible deployment**: Available in Base, Instruct, and Thinking editions.
+- **Tool calling**: Built-in support for function calling and agent workflows.
+- **Multilingual**: Broad multilingual knowledge coverage.
+
+For more details, please refer to the [Qwen3 GitHub Repository](https://github.com/QwenLM/Qwen3).
+
+## 2. Model Deployment
+
+### 2.1 Basic Configuration
+
+The Qwen3 series is fully supported by LMDeploy with both TurboMind and PyTorch backends. Recommended launch configurations vary by hardware and model size.
+
+**Interactive Command Generator**: Use the configuration selector below to automatically generate the appropriate deployment command for your hardware platform, model size, quantization method, and capabilities.
+
+```{raw} html
+<div id="lmdeploy-config-generator" data-model-config="qwen3"></div>
+```
+
+### 2.2 Configuration Tips
+
+- **Backend Selection**: TurboMind is the default high-performance backend. Use PyTorch backend (`--backend pytorch`) for broader model format compatibility and features like LoRA adapters.
+- **Tensor Parallelism (`--tp`)**: Set based on model size and available GPUs. Larger models (32B+) typically require multi-GPU setups.
+- **KV Cache Memory (`--cache-max-entry-count`)**: Controls the percentage of free GPU memory used for KV cache (default: 0.8). Lower this value if you encounter OOM errors.
+- **Session Length (`--session-len`)**: Defaults to model's max length. Set explicitly to conserve memory, e.g., `--session-len 32768`.
+- **Prefix Caching (`--enable-prefix-caching`)**: Enables automatic prefix caching for improved throughput when serving repeated prompt patterns.
+- **Quantization**: LMDeploy supports AWQ 4-bit (`--model-format awq`) and KV cache quantization (`--quant-policy 4` or `--quant-policy 8`) for Qwen3 models.
+
+## 3. Model Invocation
+
+### 3.1 Basic Usage
+
+For basic API usage, please refer to:
+
+- [OpenAI Compatible Server](../../llm/api_server.md)
+- [Pipeline (Offline Inference)](../../llm/pipeline.md)
+
+### 3.2 Reasoning Parser
+
+Qwen3 Thinking models support reasoning mode. Enable the reasoning parser during deployment to separate the thinking and content sections:
+
+```shell
+lmdeploy serve api_server Qwen/Qwen3-32B-Thinking --reasoning-parser qwen-qwq
+```
+
+For detailed usage and examples, see [Reasoning Outputs](../../llm/api_server_reasoning.md).
+
+### 3.3 Tool Calling
+
+Qwen3 supports tool calling capabilities. Enable the tool call parser:
+
+```shell
+lmdeploy serve api_server Qwen/Qwen3-32B-Instruct --tool-call-parser qwen3
+```
+
+For detailed usage and examples, see [Tools](../../llm/api_server_tools.md).
diff --git a/docs/en/conf.py b/docs/en/conf.py
index 94ca2a4def..6209c82bf3 100644
--- a/docs/en/conf.py
+++ b/docs/en/conf.py
@@ -174,7 +174,14 @@ def metrics():
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
-html_css_files = ['css/readthedocs.css']
+html_css_files = ['css/readthedocs.css', 'css/config_generator.css']
+html_js_files = [
+    'js/models/qwen3.js',
+    'js/models/deepseek.js',
+    'js/models/glm4.js',
+    'js/models/internlm.js',
+    'js/config_generator.js',
+]
 
 # Enable ::: for my_st
 myst_enable_extensions = [
diff --git a/docs/en/index.rst b/docs/en/index.rst
index d1f78ecece..bc6b2cf420 100644
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -51,6 +51,16 @@ Documentation
    supported_models/supported_models.md
    supported_models/reward_models.md
 
+.. _best_practice:
+.. toctree::
+   :maxdepth: 3
+   :caption: Deployment Best Practice
+
+   best_practice/qwen/index
+   best_practice/deepseek/index
+   best_practice/glm/index
+   best_practice/internlm/index
+
 .. _llm_deployment:
 .. toctree::
    :maxdepth: 1