Skip to content

Commit 2f85e05

Browse files
committed
Scrub body_data params too (e.g. POSTed JSON)
If we have `$c->req->body_data` - for e.g. the request was a POST with a JSON body which Catalyst has decoded into `$c->req->body_data` - then scrub HTML in there too (but applying the same `ignore_params` checks so that you can exempt certain JSON body params from scrubbing). Also moved the ignore_params tests into t/03_params.t, and added the tests for this new feature there too - don't need so many individual test apps, when most features can be tested with a single test app.
1 parent 6ac9a8e commit 2f85e05

File tree

6 files changed

+150
-119
lines changed

6 files changed

+150
-119
lines changed

lib/Catalyst/Plugin/HTML/Scrubber.pm

Lines changed: 70 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
package Catalyst::Plugin::HTML::Scrubber;
2-
2+
$Catalyst::Plugin::HTML::Scrubber::VERSION = '0.04';
33
use Moose;
44
use namespace::autoclean;
55

@@ -47,33 +47,80 @@ sub prepare_parameters {
4747
sub html_scrub {
4848
my ($c, $conf) = @_;
4949

50-
param:
51-
for my $param (keys %{ $c->request->{parameters} }) {
52-
#while (my ($param, $value) = each %{ $c->request->{parameters} }) {
53-
my $value = \$c->request->{parameters}{$param};
54-
if (ref $$value && ref $$value ne 'ARRAY') {
55-
next param;
56-
}
50+
# If there's body_data - for e.g. a POSTed JSON body that was decoded -
51+
# then we need to walk through it, scrubbing as appropriate
52+
if (my $body_data = $c->request->body_data) {
53+
$c->_scrub_recurse($conf, $c->request->body_data);
54+
}
55+
56+
# Normal query/POST body parameters:
57+
$c->_scrub_recurse($conf, $c->request->parameters);
58+
59+
}
60+
61+
# Recursively scrub param values...
62+
sub _scrub_recurse {
63+
my ($c, $conf, $data) = @_;
64+
65+
# If the thing we've got is a hashref, walk over its keys, checking
66+
# whether we should ignore, otherwise, do the needful
67+
if (ref $data eq 'HASH') {
68+
for my $key (keys %$data) {
69+
if (!$c->_should_scrub_param($conf, $key)) {
70+
next;
71+
}
5772

58-
# If we only want to operate on certain params, do that checking
59-
# now...
60-
if ($conf && $conf->{ignore_params}) {
61-
my $ignore_params = $c->config->{scrubber}{ignore_params};
62-
if (ref $ignore_params ne 'ARRAY') {
63-
$ignore_params = [ $ignore_params ];
73+
# OK, it's fine to fettle with this key - if its value is
74+
# a ref, recurse, otherwise, scrub
75+
if (my $ref = ref $data->{$key}) {
76+
$c->_scrub_recurse($conf, $data->{$key});
77+
} else {
78+
# Alright, non-ref value, so scrub it
79+
# FIXME why did we have to have this ref-ref handling fun?
80+
#$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
81+
$data->{$key} = $c->_scrubber->scrub($data->{$key});
6482
}
65-
for my $ignore_param (@$ignore_params) {
66-
if (ref $ignore_param eq 'Regexp') {
67-
next param if $param =~ $ignore_param;
68-
} else {
69-
next param if $param eq $ignore_param;
70-
}
83+
}
84+
} elsif (ref $data eq 'ARRAY') {
85+
# Simple - scrub all the values
86+
$_ = $c->_scrubber->scrub($_) for @$data;
87+
for (@$data) {
88+
if (ref $_) {
89+
$c->_scrub_recurse($conf, $_);
90+
} else {
91+
$_ = $c->_scrubber->scrub($_);
7192
}
72-
}
93+
}
94+
} elsif (ref $data eq 'CODE') {
95+
$c->log->debug("Can't scrub a coderef!");
96+
} else {
97+
# This shouldn't happen, as we should always start with a ref,
98+
# and non-ref hash/array values should have been handled above.
99+
$c->log->debug("Non-ref to scrub - should this happen?");
100+
}
101+
}
73102

74-
# If we're still here, we want to scrub this param's value.
75-
$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
103+
sub _should_scrub_param {
104+
my ($c, $conf, $param) = @_;
105+
# If we only want to operate on certain params, do that checking
106+
# now...
107+
if ($conf && $conf->{ignore_params}) {
108+
my $ignore_params = $c->config->{scrubber}{ignore_params};
109+
if (ref $ignore_params ne 'ARRAY') {
110+
$ignore_params = [ $ignore_params ];
111+
}
112+
for my $ignore_param (@$ignore_params) {
113+
if (ref $ignore_param eq 'Regexp') {
114+
return if $param =~ $ignore_param;
115+
} else {
116+
return if $param eq $ignore_param;
117+
}
118+
}
76119
}
120+
121+
# If we've not bailed above, we didn't match any ignore_params
122+
# entries, or didn't have any, so we do want to scrub
123+
return 1;
77124
}
78125

79126
__PACKAGE__->meta->make_immutable;

t/03_params.t

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,86 @@ use Test::More;
1919
my $req = POST('/', [foo => 'bar']);
2020
my ($res, $c) = ctx_request($req);
2121
ok($res->code == RC_OK, 'response ok');
22-
is($c->req->param('foo'), 'bar', 'parameter ok');
22+
is($c->req->param('foo'), 'bar', 'Normal POST body param, nothing to strip, left alone');
2323
}
2424
{
2525
my $req = POST('/', [foo => 'bar<script>alert("0");</script>']);
2626
my ($res, $c) = ctx_request($req);
2727
ok($res->code == RC_OK, 'response ok');
28-
is($c->req->param('foo'), 'bar');
28+
is($c->req->param('foo'), 'bar', 'XSS stripped from normal POST body param');
2929
}
3030
{
31+
# we allow <b> in the test app config so this should not be stripped
3132
my $req = POST('/', [foo => '<b>bar</b>']);
3233
my ($res, $c) = ctx_request($req);
3334
ok($res->code == RC_OK, 'response ok');
34-
is($c->req->param('foo'), '<b>bar</b>', 'parameter ok');
35+
is($c->req->param('foo'), '<b>bar</b>', 'Allowed tag not stripped');
36+
}
37+
{
38+
diag "HTML left alone in ignored field - by regex match";
39+
my $value = '<h1>Bar</h1><p>Foo</p>';
40+
my $req = POST('/', [foo_html => $value]);
41+
my ($res, $c) = ctx_request($req);
42+
ok($res->code == RC_OK, 'response ok');
43+
is(
44+
$c->req->param('foo_html'),
45+
$value,
46+
'HTML left alone in ignored (by regex) field',
47+
);
48+
}
49+
{
50+
diag "HTML left alone in ignored field - by name";
51+
my $value = '<h1>Bar</h1><p>Foo</p>';
52+
my $req = POST('/', [ignored_param => $value]);
53+
my ($res, $c) = ctx_request($req);
54+
ok($res->code == RC_OK, 'response ok');
55+
is(
56+
$c->req->param('ignored_param'),
57+
$value,
58+
'HTML left alone in ignored (by name) field',
59+
);
60+
}
61+
62+
{
63+
# Test that data in a JSON body POSTed gets scrubbed too
64+
my $json_body = <<JSON;
65+
{
66+
"foo": "Top-level <img src=foo.jpg title=fun>",
67+
"baz":{
68+
"one":"Second-level <img src=test.jpg>"
69+
},
70+
"arr": [
71+
"one test <img src=arrtest1.jpg>",
72+
"two <script>window.alert('XSS!');</script>"
73+
],
74+
"some_html": "Leave <b>this</b> alone: <img src=allowed.gif>"
75+
}
76+
JSON
77+
my $req = POST('/',
78+
Content_Type => 'application/json', Content => $json_body
79+
);
80+
my ($res, $c) = ctx_request($req);
81+
ok($res->code == RC_OK, 'response ok');
82+
is(
83+
$c->req->body_data->{foo},
84+
'Top-level ', # note trailing space where img was removed
85+
'Top level body param scrubbed',
86+
);
87+
is(
88+
$c->req->body_data->{baz}{one},
89+
'Second-level ',
90+
'Second level body param scrubbed',
91+
);
92+
is(
93+
$c->req->body_data->{arr}[0],
94+
'one test ',
95+
'Second level array contents scrubbbed',
96+
);
97+
is(
98+
$c->req->body_data->{some_html},
99+
'Leave <b>this</b> alone: <img src=allowed.gif>',
100+
'Body data param matching ignore_params left alone',
101+
);
35102
}
36103

37104
done_testing();

t/05_ignore_params.t

Lines changed: 0 additions & 53 deletions
This file was deleted.

t/lib/MyApp03.pm

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,17 @@ extends 'Catalyst';
99

1010
__PACKAGE__->config(
1111
name => 'MyApp03',
12-
scrubber => [allow => [qw/br hr b/],]
12+
scrubber => {
1313

14+
auto => 1,
15+
16+
ignore_params => [ qr/_html$/, 'ignored_param' ],
17+
18+
# params for HTML::Scrubber
19+
params => [
20+
allow => [qw/br hr b/],
21+
],
22+
}
1423
);
1524
__PACKAGE__->setup();
1625

t/lib/MyApp05.pm

Lines changed: 0 additions & 22 deletions
This file was deleted.

t/lib/MyApp05/Controller/Root.pm

Lines changed: 0 additions & 17 deletions
This file was deleted.

0 commit comments

Comments
 (0)