@@ -14,14 +14,17 @@ Sanitizes the HTML input according to `whitelist`.
14
14
- `prettyprint`: Returns a prettier multiline string instead of a somewhat minified version.
15
15
"""
16
16
function sanitize (input:: AbstractString ; isfragment = true , whitelist = WHITELIST, prettyprint = false )
17
- doc = parsehtml (input)
17
+ input_preserve_ws = replace (input, r" (\s +)" s => s " 🐑\1 🐑 " )
18
+ doc = parsehtml (input_preserve_ws)
18
19
19
20
sanitize_bfs (doc. root, whitelist)
20
21
21
22
out = IOBuffer ()
22
23
print (out, doc. root, pretty = prettyprint)
23
24
24
25
out = String (take! (out))
26
+ out = replace (out, r" \s ?🐑(\s +)🐑\s ?" s => s "\1 " )
27
+
25
28
if isfragment
26
29
out = replace (out, r" ^<HTML>" => " " )
27
30
out = replace (out, r" </HTML>$" => " " )
@@ -30,7 +33,9 @@ function sanitize(input::AbstractString; isfragment = true, whitelist = WHITELIS
30
33
end
31
34
end
32
35
33
- reparent! (node, parent) = node. parent = parent
36
+ reparent! (_, _) = nothing
37
+
38
+ reparent! (node:: HTMLElement , parent) = node. parent = parent
34
39
35
40
# HTMLText isn't mutable, so this does nothing. Will lead to inconsistencies, but ¯\_(ツ)_/¯.
36
41
reparent! (node:: HTMLText , parent) = nothing
@@ -70,7 +75,8 @@ function sanitize_element(el::HTMLElement{TAG}, whitelist) where TAG
70
75
return Gumbo. HTMLText (" " )
71
76
end
72
77
@debug (" Replacing `$(tag) ` with its contents." )
73
- return sanitize_element .(el. children, Ref (whitelist))
78
+ out = sanitize_element .(el. children, Ref (whitelist))
79
+ return isempty (out) ? Gumbo. HTMLText (" " ) : out
74
80
end
75
81
76
82
el = sanitize_attributes (el, whitelist)
0 commit comments