Skip to content

Commit

Permalink
Replace dompurify with sanitize-html.
Browse files Browse the repository at this point in the history
Turns out that sanitize-html is primarily meant for node, not the browser. In Vite, this results in warnings about externalized node builtins.

dompurify is also 8 kB while sanitize-html was 80 kB.

See: apostrophecms/sanitize-html#639
  • Loading branch information
raineorshine committed May 11, 2024
1 parent 283fc4b commit dc21366
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 42 deletions.
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
"axios": "^1.6.8",
"classnames": "^2.3.1",
"clipboard": "^2.0.8",
"dompurify": "^3.1.2",
"emitter20": "^2.0.0",
"emoji-regex": "^10.2.1",
"fast-json-patch": "^3.0.0-1",
Expand Down Expand Up @@ -119,7 +120,6 @@
"redux-devtools-extension": "^2.13.9",
"redux-thunk": "^2.3.0",
"reselect": "^5.1.0",
"sanitize-html": "^2.4.0",
"text-block-parser": "^1.1.0",
"truncate-html": "^1.0.3",
"ts-key-enum": "^2.0.12",
Expand All @@ -141,6 +141,7 @@
"@trivago/prettier-plugin-sort-imports": "^4.2.0",
"@types/classnames": "^2.3.0",
"@types/clipboard": "^2.0.1",
"@types/dompurify": "^3.0.5",
"@types/expect-puppeteer": "^5.0.6",
"@types/html-escaper": "^3.0.0",
"@types/jest": "^26.0.23",
Expand Down
4 changes: 1 addition & 3 deletions src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,7 @@ export const ALLOWED_FORMATTING_TAGS = ['b', 'i', 'u', 'em', 'strong', 'span', '

export const ALLOWED_TAGS = ['ul', 'li', 'br', ...ALLOWED_FORMATTING_TAGS]

export const ALLOWED_ATTRIBUTES = {
span: ['class', 'style'],
}
export const ALLOWED_ATTR = ['class', 'style']

export const EMPTY_SPACE = ' '

Expand Down
30 changes: 28 additions & 2 deletions src/util/htmlToJson.ts
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,35 @@ const himalayaToBlock = (nodes: HimalayaNode[]): Block | Block[] => {

if (Array.isArray(blocks[0])) return blocks.flat()

// retrieve first chunk, if the first element is Block and the second is Block[], join children (Block[]) with parent (Block), else return blocks as is.
// retrieve first chunk, if the first element is Block and the second is Block[], join children (Block[]) with parent (Block).
const [first, rest] = blocks
const result = !Array.isArray(first) && Array.isArray(rest) ? joinChildren(blocks) : (blocks as Block[])
const result =
!Array.isArray(first) && Array.isArray(rest)
? joinChildren(blocks)
: /* It is still possible for blocks to contain nested arrays, so we must flatten them into Block[].
This occurs when a child comes after a <br> tag, such as in the following example.
e.g.
- a
- b
- c<br><span class="note">This is c!</span>
This should be import as:
- a
- b
- c
- =note
- This is c!
*/
blocks.map(blockOrArray =>
Array.isArray(blockOrArray)
? {
scope: blockOrArray[0].scope,
children: blockOrArray.slice(1),
}
: blockOrArray,
)

return result
}
Expand Down
15 changes: 8 additions & 7 deletions src/util/strip.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import DOMPurify from 'dompurify'
import { HimalayaNode, parse } from 'himalaya'
import { unescape as unescapeHtml } from 'html-escaper'
import _ from 'lodash'
import sanitize from 'sanitize-html'
import { ALLOWED_ATTRIBUTES, ALLOWED_FORMATTING_TAGS } from '../constants'
import { ALLOWED_ATTR, ALLOWED_FORMATTING_TAGS } from '../constants'
import formattingNodeToHtml from './formattingNodeToHtml'
import isFormattingTag from './isFormattingTag'

Expand All @@ -23,15 +23,16 @@ const strip = (
.replace(/<\/p><p/g, '</p>\n<p') // <p> is a block element, if there is no newline between <p> tags add newline.
.replace(REGEX_BR_TAG, '\n') // Some text editors add <br> instead of \n
.replace(REGEX_SPAN_TAG_ONLY_CONTAINS_WHITESPACES, '$1') // Replace span tags contain whitespaces
.replace(REGEX_NBSP, ' ')
.replace(REGEX_DECIMAL_SPACE, ' ') // Some text editors use decimal code for space character
.replace(REGEX_EMPTY_FORMATTING_TAGS, '') // Remove empty formatting tags

const sanitizedHtml = unescapeHtml(
sanitize(replacedHtml, {
allowedTags: preserveFormatting ? ALLOWED_FORMATTING_TAGS : [],
allowedAttributes: ALLOWED_ATTRIBUTES,
}),
DOMPurify.sanitize(replacedHtml, {
ALLOWED_TAGS: preserveFormatting ? ALLOWED_FORMATTING_TAGS : [],
ALLOWED_ATTR,
})
// DOMPurify replaces spaces with &nbsp;, so we need to replace them after sanitizing rather than in the replacedHtml replacements above
.replace(REGEX_NBSP, ' '),
)

let finalHtml = sanitizedHtml
Expand Down
11 changes: 5 additions & 6 deletions src/util/textToHtml.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import DOMPurify from 'dompurify'
import _ from 'lodash'
import sanitize from 'sanitize-html'
import { parse } from 'text-block-parser'
import Block from '../@types/Block'
import { ALLOWED_ATTRIBUTES, ALLOWED_TAGS } from '../constants'
import { ALLOWED_ATTR, ALLOWED_TAGS } from '../constants'
import strip from '../util/strip'

const REGEX_CONTAINS_META_TAG = /^<(!doctype|meta)\s*.*?>/i
Expand Down Expand Up @@ -70,10 +70,9 @@ const isCopiedFromApp = (htmlText: string) => REGEX_CONTAINS_META_TAG.test(htmlT
const blocksToHtml = (parsedBlocks: Block[]): string =>
parsedBlocks
.map(block => {
const value = sanitize(block.scope.replace(REGEX_PLAINTEXT_BULLET, '').trim(), {
allowedTags: ALLOWED_TAGS,
allowedAttributes: ALLOWED_ATTRIBUTES,
disallowedTagsMode: 'recursiveEscape',
const value = DOMPurify.sanitize(block.scope.replace(REGEX_PLAINTEXT_BULLET, '').trim(), {
ALLOWED_TAGS,
ALLOWED_ATTR,
})
const childrenHtml = block.children.length > 0 ? `<ul>${blocksToHtml(block.children)}</ul>` : ''
return value || childrenHtml ? `<li>${value}${childrenHtml}</li>` : ''
Expand Down
41 changes: 18 additions & 23 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2628,6 +2628,13 @@
dependencies:
clipboard "*"

"@types/dompurify@^3.0.5":
version "3.0.5"
resolved "https://registry.yarnpkg.com/@types/dompurify/-/dompurify-3.0.5.tgz#02069a2fcb89a163bacf1a788f73cb415dd75cb7"
integrity sha512-1Wg0g3BtQF7sSb27fJQAKck1HECM6zV1EB66j8JH9i3LCjYabJa0FSdiSgsD5K/RbrsR0SiraKacLB+T8ZVYAg==
dependencies:
"@types/trusted-types" "*"

"@types/[email protected]", "@types/estree@^1.0.0":
version "1.0.5"
resolved "https://registry.yarnpkg.com/@types/estree/-/estree-1.0.5.tgz#a6ce3e556e00fd9895dd872dd172ad0d4bd687f4"
Expand Down Expand Up @@ -2941,6 +2948,11 @@
dependencies:
"@types/jest" "*"

"@types/trusted-types@*":
version "2.0.7"
resolved "https://registry.yarnpkg.com/@types/trusted-types/-/trusted-types-2.0.7.tgz#baccb07a970b91707df3a3e8ba6896c57ead2d11"
integrity sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==

"@types/ua-parser-js@^0.7.33":
version "0.7.39"
resolved "https://registry.yarnpkg.com/@types/ua-parser-js/-/ua-parser-js-0.7.39.tgz#832c58e460c9435e4e34bb866e85e9146e12cdbb"
Expand Down Expand Up @@ -4812,6 +4824,11 @@ domhandler@^5.0.2, domhandler@^5.0.3:
dependencies:
domelementtype "^2.3.0"

dompurify@^3.1.2:
version "3.1.2"
resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-3.1.2.tgz#d1e158457e00666ab40c9c3d8aab57586a072bd1"
integrity sha512-hLGGBI1tw5N8qTELr3blKjAML/LY4ANxksbS612UiJyDfyf/2D092Pvm+S7pmeTGJRqvlJkFzBoHBQKgQlOQVg==

domutils@^3.0.1:
version "3.1.0"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-3.1.0.tgz#c47f551278d3dc4b0b1ab8cbb42d751a6f0d824e"
Expand Down Expand Up @@ -6560,11 +6577,6 @@ is-plain-obj@^2.0.0:
resolved "https://registry.yarnpkg.com/is-plain-obj/-/is-plain-obj-2.1.0.tgz#45e42e37fccf1f40da8e5f76ee21515840c09287"
integrity sha512-YWnfyRwxL/+SsrWYfOpUtz5b3YD+nyfkHvjbcanzk8zgyO4ASD67uVMRt8k5bM4lLMDnXfriRhOpemw+NfT1eA==

is-plain-object@^5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-5.0.0.tgz#4427f50ab3429e9025ea7d52e9043a9ef4159344"
integrity sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==

is-potential-custom-element-name@^1.0.1:
version "1.0.1"
resolved "https://registry.yarnpkg.com/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz#171ed6f19e3ac554394edf78caa05784a45bebb5"
Expand Down Expand Up @@ -8434,11 +8446,6 @@ parse-json@^5.0.0, parse-json@^5.2.0:
json-parse-even-better-errors "^2.3.0"
lines-and-columns "^1.1.6"

parse-srcset@^1.0.2:
version "1.0.2"
resolved "https://registry.yarnpkg.com/parse-srcset/-/parse-srcset-1.0.2.tgz#f2bd221f6cc970a938d88556abc589caaaa2bde1"
integrity sha512-/2qh0lav6CmI15FzA3i/2Bzk2zCgQhGMkvhOhKNcBVQ1ldgpbfiNTVslmooUmWJcADi1f1kIeynbDRVzNlfR6Q==

parse5-htmlparser2-tree-adapter@^7.0.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz#23c2cc233bcf09bb7beba8b8a69d46b08c62c2f1"
Expand Down Expand Up @@ -8600,7 +8607,7 @@ possible-typed-array-names@^1.0.0:
resolved "https://registry.yarnpkg.com/possible-typed-array-names/-/possible-typed-array-names-1.0.0.tgz#89bb63c6fada2c3e90adc4a647beeeb39cc7bf8f"
integrity sha512-d7Uw+eZoloe0EHDIYoe+bQ5WXnGMOpmiZFTuMWCwpjzzkL2nTjcKiAk4hh8TjnGye2TwWOk3UXucZ+3rbmBa8Q==

postcss@^8.3.11, postcss@^8.4.38:
postcss@^8.4.38:
version "8.4.38"
resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.4.38.tgz#b387d533baf2054288e337066d81c6bee9db9e0e"
integrity sha512-Wglpdk03BSfXkHoQa3b/oulrotAkwrlLDRSOb9D0bN86FdRyE9lppSp33aHNPgBa0JKCoB+drFLZkQoRRYae5A==
Expand Down Expand Up @@ -9383,18 +9390,6 @@ safe-regex-test@^1.0.3:
resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a"
integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==

sanitize-html@^2.4.0:
version "2.13.0"
resolved "https://registry.yarnpkg.com/sanitize-html/-/sanitize-html-2.13.0.tgz#71aedcdb777897985a4ea1877bf4f895a1170dae"
integrity sha512-Xff91Z+4Mz5QiNSLdLWwjgBDm5b1RU6xBT0+12rapjiaR7SwfRdjw8f+6Rir2MXKLrDicRFHdb51hGOAxmsUIA==
dependencies:
deepmerge "^4.2.2"
escape-string-regexp "^4.0.0"
htmlparser2 "^8.0.0"
is-plain-object "^5.0.0"
parse-srcset "^1.0.2"
postcss "^8.3.11"

[email protected]:
version "1.1.4"
resolved "https://registry.yarnpkg.com/sax/-/sax-1.1.4.tgz#74b6d33c9ae1e001510f179a91168588f1aedaa9"
Expand Down

0 comments on commit dc21366

Please sign in to comment.